docdiff 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/docdiff +16 -2
- data/docdiff.gemspec +1 -1
- data/lib/docdiff/charstring.rb +4 -3
- data/lib/docdiff/diff/unidiff.rb +0 -1
- data/lib/docdiff/encoding/en_ascii.rb +12 -39
- data/lib/docdiff/encoding/ja_eucjp.rb +12 -39
- data/lib/docdiff/encoding/ja_sjis.rb +12 -39
- data/lib/docdiff/encoding/ja_utf8.rb +12 -39
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +12 -4
- data/readme.html +18 -1
- data/readme.md +2 -1
- data/test/charstring_test.rb +3 -0
- data/test/docdiff_test.rb +4 -2
- data/test/document_test.rb +3 -0
- metadata +4 -9
- data/lib/viewdiff.rb +0 -379
- data/test/viewdiff_test.rb +0 -911
data/lib/viewdiff.rb
DELETED
|
@@ -1,379 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/ruby
|
|
2
|
-
# 2005-08-29..xx-xx-xx Hisashi Morita
|
|
3
|
-
|
|
4
|
-
require 'docdiff/difference'
|
|
5
|
-
require 'docdiff/document'
|
|
6
|
-
require 'docdiff/view'
|
|
7
|
-
require 'docdiff/charstring'
|
|
8
|
-
|
|
9
|
-
require "tempfile"
|
|
10
|
-
|
|
11
|
-
# $KCODE="e"
|
|
12
|
-
|
|
13
|
-
class String
|
|
14
|
-
def to_lines
|
|
15
|
-
scan(Regexp.new("(?:.*(?:\r\n|\r|\n|\z))", Regexp::MULTILINE))
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
class DocDiff
|
|
20
|
-
def scan_text_for_diffs(src)
|
|
21
|
-
eol = "(?:\r\n|\n|\r)"
|
|
22
|
-
pats = {
|
|
23
|
-
:classic => "(?:[0-9]+(?:,[0-9]+)?[dac][0-9]+(?:,[0-9]+)?#{eol}.+?(?=^[^-<>0-9 ]))",
|
|
24
|
-
:context => "(?:^\\*{3} .+?#{eol}--- .+?#{eol}.+?(?=^[^-+! *]|\\z))",
|
|
25
|
-
:unified => "(?:^--- .+?#{eol}^\\+{3} .+?#{eol}.+?(?=^[^-+ @]|\\z))"
|
|
26
|
-
}
|
|
27
|
-
src.scan(/(?:#{pats.values.join("|")})|(?:.*?#{eol}+)/m)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
class DiffFile < Array
|
|
31
|
-
|
|
32
|
-
def initialize(src)
|
|
33
|
-
src.extend(CharString)
|
|
34
|
-
src.encoding = CharString.guess_encoding(src)
|
|
35
|
-
src.eol = CharString.guess_eol(src)
|
|
36
|
-
@src = src
|
|
37
|
-
end
|
|
38
|
-
attr_accessor :src
|
|
39
|
-
|
|
40
|
-
def guess_diff_type(text)
|
|
41
|
-
case
|
|
42
|
-
when (/^[<>] /m).match(text) then return "classic"
|
|
43
|
-
when (/^[-+!] /m).match(text) then return "context"
|
|
44
|
-
when (/^[-+]/m).match(text) then return "unified"
|
|
45
|
-
else return "unknown"
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def anatomize
|
|
50
|
-
case guess_diff_type(@src)
|
|
51
|
-
when "classic" then return anatomize_classic(@src)
|
|
52
|
-
when "context" then return anatomize_context(@src)
|
|
53
|
-
when "unified" then return anatomize_unified(@src)
|
|
54
|
-
else
|
|
55
|
-
raise "unsupported diff format: \n#{src.inspect}"
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
module ClassicDiff
|
|
62
|
-
def eol
|
|
63
|
-
"(?:\r\n|\n|\r)"
|
|
64
|
-
end
|
|
65
|
-
def noeol
|
|
66
|
-
"(?:[^\r\n])"
|
|
67
|
-
end
|
|
68
|
-
def hunk_header
|
|
69
|
-
"(?:[0-9]+(?:,[0-9]+)?[dac][0-9]+(?:,[0-9]+)?#{eol})"
|
|
70
|
-
end
|
|
71
|
-
def del
|
|
72
|
-
"(?:^< ?#{noeol}*?#{eol})"
|
|
73
|
-
end
|
|
74
|
-
def sep
|
|
75
|
-
"(?:^---#{eol})"
|
|
76
|
-
end
|
|
77
|
-
def add
|
|
78
|
-
"(?:^> ?#{noeol}*?#{eol})"
|
|
79
|
-
end
|
|
80
|
-
def change
|
|
81
|
-
"(?:#{del}+#{sep}#{add}+)"
|
|
82
|
-
end
|
|
83
|
-
def misc
|
|
84
|
-
"(?:.*?(?:#{eol}|\z))"
|
|
85
|
-
end
|
|
86
|
-
def hunk
|
|
87
|
-
"(?:#{hunk_header}(?:#{change}|#{del}+|#{add}+))"
|
|
88
|
-
end
|
|
89
|
-
def elements
|
|
90
|
-
"(?:#{hunk}|#{misc})"
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
def anatomize_classic(src)
|
|
95
|
-
self.extend ClassicDiff
|
|
96
|
-
diffed = []
|
|
97
|
-
src_encoding = CharString.guess_encoding(src)
|
|
98
|
-
src_eol = CharString.guess_eol(src)
|
|
99
|
-
src.scan(Regexp.new(elements, Regexp::MULTILINE)){|m|
|
|
100
|
-
case
|
|
101
|
-
when /\A[0-9]/.match(m) then # hunk
|
|
102
|
-
diffed.concat(anatomize_classic_hunk(m, src_encoding, src_eol))
|
|
103
|
-
else # not hunk
|
|
104
|
-
diffed.concat(Difference.new(m.split(/^/), m.split(/^/)))
|
|
105
|
-
end
|
|
106
|
-
}
|
|
107
|
-
return diffed
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
def anatomize_classic_hunk(a_hunk, src_encoding, src_eol)
|
|
111
|
-
self.extend ClassicDiff
|
|
112
|
-
diffed = []
|
|
113
|
-
a_hunk.scan(/(#{hunk_header})(#{change}|#{del}+|#{add}+)/){|n|
|
|
114
|
-
head, body = [$1, $2].collect{|e|
|
|
115
|
-
e.extend(CharString)
|
|
116
|
-
e.encoding, e.eol = src_encoding, src_eol
|
|
117
|
-
e
|
|
118
|
-
}
|
|
119
|
-
diffed.concat(Difference.new(head.to_words, head.to_words))
|
|
120
|
-
case
|
|
121
|
-
when /d/.match(head) # del
|
|
122
|
-
diffed.concat(Difference.new(body.to_words, []))
|
|
123
|
-
when /a/.match(head) # add
|
|
124
|
-
diffed.concat(Difference.new([], body.to_words))
|
|
125
|
-
when /c/.match(head) # change (need tweak)
|
|
126
|
-
former, latter = body.split(/#{sep}/).collect{|e|
|
|
127
|
-
e.extend(CharString)
|
|
128
|
-
e.encoding, e.eol = src_encoding, src_eol
|
|
129
|
-
e
|
|
130
|
-
}
|
|
131
|
-
d = Difference.new(former.to_words, latter.to_words)
|
|
132
|
-
diffed_former = d.former_only
|
|
133
|
-
diffed_latter = d.latter_only
|
|
134
|
-
sepstr = /#{sep}/.match(body).to_s.extend(CharString)
|
|
135
|
-
sepstr.encoding, sepstr.eol = src_encoding, src_eol
|
|
136
|
-
sepelm = Difference.new(sepstr.to_words, sepstr.to_words)
|
|
137
|
-
diffed.concat(diffed_former + sepelm + diffed_latter)
|
|
138
|
-
else
|
|
139
|
-
raise "invalid hunk header: #{head}"
|
|
140
|
-
end
|
|
141
|
-
}
|
|
142
|
-
return diffed
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
module ContextDiff
|
|
146
|
-
def eol
|
|
147
|
-
"(?:\r\n|\n|\r|\\z)"
|
|
148
|
-
end
|
|
149
|
-
def noneol
|
|
150
|
-
"(?:[^\r\n])"
|
|
151
|
-
end
|
|
152
|
-
def hunk_header
|
|
153
|
-
"(?:\\*+#{eol})"
|
|
154
|
-
end
|
|
155
|
-
def hunk_subheader_former
|
|
156
|
-
"(?:^\\*+ [0-9]+,[0-9]+ \\*+#{eol})"
|
|
157
|
-
end
|
|
158
|
-
def hunk_subheader_latter
|
|
159
|
-
"(?:^-+ [0-9]+,[0-9]+ -+#{eol})"
|
|
160
|
-
end
|
|
161
|
-
def del
|
|
162
|
-
"(?:^- #{noneol}*?#{eol})"
|
|
163
|
-
end
|
|
164
|
-
def add
|
|
165
|
-
"(?:^\\+ #{noneol}*?#{eol})"
|
|
166
|
-
end
|
|
167
|
-
def change
|
|
168
|
-
"(?:^! #{noneol}*?#{eol})"
|
|
169
|
-
end
|
|
170
|
-
def misc
|
|
171
|
-
"(?:^[^-+!*]+?#{eol}+?)"
|
|
172
|
-
end
|
|
173
|
-
def any
|
|
174
|
-
"(?:#{del}+|#{add}+|#{change}+|#{misc}+)"
|
|
175
|
-
end
|
|
176
|
-
def file_header
|
|
177
|
-
"(?:[-\\*]{3} #{noneol}+?#{eol})"
|
|
178
|
-
end
|
|
179
|
-
def elements
|
|
180
|
-
"(?:#{file_header}|#{hunk_header}#{hunk_subheader_former}#{any}*?#{hunk_subheader_latter}#{any}+|#{misc}|#{noneol}+#{eol})"
|
|
181
|
-
end
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
def anatomize_context(src)
|
|
185
|
-
self.extend ContextDiff
|
|
186
|
-
diffed = []
|
|
187
|
-
src_encoding = CharString.guess_encoding(src)
|
|
188
|
-
src_eol = CharString.guess_eol(src)
|
|
189
|
-
src.scan(/#{elements}/m){|m|
|
|
190
|
-
case
|
|
191
|
-
when /\A\*{10,}#{eol}^\*{3} /.match(m) then # hunk
|
|
192
|
-
diffed.concat(anatomize_context_hunk(m, src_encoding, src_eol))
|
|
193
|
-
else # not hunk
|
|
194
|
-
m.extend(CharString)
|
|
195
|
-
m.encoding, m.eol = src_encoding, src_eol
|
|
196
|
-
diffed.concat(Difference.new(m.to_words, m.to_words))
|
|
197
|
-
end
|
|
198
|
-
}
|
|
199
|
-
return diffed
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
def anatomize_context_hunk(a_hunk, src_encoding, src_eol)
|
|
203
|
-
self.extend ContextDiff
|
|
204
|
-
diffed = []
|
|
205
|
-
h, sh_f, body_f, sh_l, body_l = nil
|
|
206
|
-
a_hunk.scan(/(#{hunk_header})(#{hunk_subheader_former})(.*?)(#{hunk_subheader_latter})(.*?)\z/m){|m|
|
|
207
|
-
h, sh_f, body_f, sh_l, body_l = m[0..4].collect{|he|
|
|
208
|
-
if he
|
|
209
|
-
he.extend(CharString)
|
|
210
|
-
he.encoding, he.eol = src_encoding, src_eol
|
|
211
|
-
end
|
|
212
|
-
he
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
diffed_former, diffed_latter = anatomize_context_hunk_scanbodies(body_f, body_l, src_encoding, src_eol)
|
|
216
|
-
diffed.concat(Difference.new(h.to_words, h.to_words) +
|
|
217
|
-
Difference.new(sh_f.to_words, sh_f.to_words) +
|
|
218
|
-
diffed_former +
|
|
219
|
-
Difference.new(sh_l.to_words, sh_l.to_words) +
|
|
220
|
-
diffed_latter)
|
|
221
|
-
return diffed
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
def anatomize_context_hunk_scanbodies(body_f, body_l, src_encoding, src_eol)
|
|
225
|
-
body_f = '' if body_f.nil?
|
|
226
|
-
body_l = '' if body_l.nil?
|
|
227
|
-
self.extend ContextDiff
|
|
228
|
-
changes_org = [[], []]
|
|
229
|
-
changes_org[0], changes_org[1] = [body_f, body_l].collect{|b|
|
|
230
|
-
b.scan(/#{change}+/).collect{|ch|
|
|
231
|
-
if ch
|
|
232
|
-
ch.extend(CharString)
|
|
233
|
-
ch.encoding, ch.eol = src_encoding, src_eol
|
|
234
|
-
end
|
|
235
|
-
ch
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
changes = changes_org.dup
|
|
239
|
-
diffed = [[], []]
|
|
240
|
-
[body_f, body_l].each_with_index{|half, i|
|
|
241
|
-
changes[0], changes[1] = changes_org[0].dup, changes_org[1].dup
|
|
242
|
-
half.scan(/(#{del}+)|(#{add}+)|(#{change}+)|(#{misc}+)/m){|elm|
|
|
243
|
-
elm_d, elm_a, elm_c, elm_cmn = elm[0..3]
|
|
244
|
-
[elm_d, elm_a, elm_c, elm_cmn].collect{|e|
|
|
245
|
-
if e
|
|
246
|
-
e.extend(CharString)
|
|
247
|
-
e.encoding, e.eol = src_encoding, src_eol
|
|
248
|
-
end
|
|
249
|
-
e
|
|
250
|
-
}
|
|
251
|
-
case
|
|
252
|
-
when elm_d then d = Difference.new(elm_d.to_words, [])
|
|
253
|
-
when elm_a then d = Difference.new([], elm_a.to_words)
|
|
254
|
-
when elm_c then d = Difference.new(changes[0].shift.to_words, changes[1].shift.to_words)
|
|
255
|
-
case i
|
|
256
|
-
when 0 then d = d.former_only
|
|
257
|
-
when 1 then d = d.latter_only
|
|
258
|
-
else raise
|
|
259
|
-
end
|
|
260
|
-
when elm_cmn then d = Difference.new(elm_cmn.to_words, elm_cmn.to_words)
|
|
261
|
-
else
|
|
262
|
-
raise "bummers!"
|
|
263
|
-
end
|
|
264
|
-
diffed[i].concat(d)
|
|
265
|
-
} # end half.scan
|
|
266
|
-
} # end each_with_index
|
|
267
|
-
return diffed
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
module UnifiedDiff
|
|
271
|
-
def eol
|
|
272
|
-
"(?:\r\n|\n|\r|\z)"
|
|
273
|
-
end
|
|
274
|
-
def noneol
|
|
275
|
-
"(?:[^\r\n])"
|
|
276
|
-
end
|
|
277
|
-
def hunk_header
|
|
278
|
-
"(?:@@ #{noneol}+#{eol})"
|
|
279
|
-
end
|
|
280
|
-
def del
|
|
281
|
-
"(?:^-#{noneol}*?#{eol})"
|
|
282
|
-
end
|
|
283
|
-
def add
|
|
284
|
-
"(?:^\\+#{noneol}*?#{eol})"
|
|
285
|
-
end
|
|
286
|
-
def change
|
|
287
|
-
"(?:#{del}+#{add}+)"
|
|
288
|
-
end
|
|
289
|
-
def common
|
|
290
|
-
"(?:^ #{noneol}*?#{eol})"
|
|
291
|
-
end
|
|
292
|
-
def misc
|
|
293
|
-
"(?:^[^-+]+?#{eol}+?)"
|
|
294
|
-
end
|
|
295
|
-
def any
|
|
296
|
-
"(?:#{del}+|#{add}+|#{change}+|#{common}+|#{misc}+)"
|
|
297
|
-
end
|
|
298
|
-
def file_header
|
|
299
|
-
"(?:^[^-+@ ]#{noneol}+#{eol}(?:^[-+]{3} #{noneol}+#{eol}){2})"
|
|
300
|
-
end
|
|
301
|
-
def elements
|
|
302
|
-
"(?:#{file_header}|#{hunk_header}#{any}+?|#{misc}|#{noneol}+#{eol})"
|
|
303
|
-
end
|
|
304
|
-
end
|
|
305
|
-
|
|
306
|
-
def anatomize_unified(src)
|
|
307
|
-
self.extend UnifiedDiff
|
|
308
|
-
diffed = []
|
|
309
|
-
src_encoding = CharString.guess_encoding(src)
|
|
310
|
-
src_eol = CharString.guess_eol(src)
|
|
311
|
-
src.scan(/#{elements}/m){|m|
|
|
312
|
-
case
|
|
313
|
-
when /\A@@ /.match(m) then # hunk
|
|
314
|
-
diffed.concat(anatomize_unified_hunk(m.to_s, src_encoding, src_eol))
|
|
315
|
-
else # not hunk
|
|
316
|
-
m.extend(CharString)
|
|
317
|
-
m.encoding, m.eol = src_encoding, src_eol
|
|
318
|
-
diffed.concat(Difference.new(m.to_words, m.to_words))
|
|
319
|
-
end
|
|
320
|
-
}
|
|
321
|
-
return diffed
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
def anatomize_unified_hunk(a_hunk, src_encoding, src_eol)
|
|
325
|
-
self.extend UnifiedDiff
|
|
326
|
-
diffed = []
|
|
327
|
-
a_hunk.scan(/(#{hunk_header})(#{any}+#{eol}?)/m){|m|
|
|
328
|
-
head, body = m[0], m[1]
|
|
329
|
-
[head, body].collect{|e|
|
|
330
|
-
e.extend(CharString)
|
|
331
|
-
e.encoding, e.eol = src_encoding, src_eol
|
|
332
|
-
}
|
|
333
|
-
diffed.concat(Difference.new(head.to_words, head.to_words))
|
|
334
|
-
body.scan(/(#{del}+)(#{add}+)|(#{del}+#{eol}?)|(#{add}+)|(#{common}+#{eol}?)|(.*#{eol}?)/m){|m|
|
|
335
|
-
cf, cl, d, a, cmn, msc = m[0..5]
|
|
336
|
-
[cf, cl, d, a, cmn, msc].collect{|e|
|
|
337
|
-
next if e.nil?
|
|
338
|
-
e.extend(CharString)
|
|
339
|
-
e.encoding, e.eol = src_encoding, src_eol
|
|
340
|
-
}
|
|
341
|
-
case
|
|
342
|
-
when (cf and cl) then
|
|
343
|
-
Difference.new(cf.to_words, cl.to_words).each{|e|
|
|
344
|
-
case e.first
|
|
345
|
-
when :change_elt then diffed << [:change_elt, e[1], nil]
|
|
346
|
-
diffed << [:change_elt, nil, e[2]]
|
|
347
|
-
when :del_elt then diffed << [:change_elt, e[1], nil]
|
|
348
|
-
when :add_elt then diffed << [:change_elt, nil, e[2]]
|
|
349
|
-
when :common_elt_elt then diffed << e
|
|
350
|
-
else raise "bummers! (#{e.inspect})"
|
|
351
|
-
end
|
|
352
|
-
}
|
|
353
|
-
when d then diffed.concat(Difference.new(d.to_words, []))
|
|
354
|
-
when a then diffed.concat(Difference.new([], a.to_words))
|
|
355
|
-
when cmn then diffed.concat(Difference.new(cmn.to_words, cmn.to_words))
|
|
356
|
-
when msc then diffed.concat(Difference.new(msc.to_words, msc.to_words))
|
|
357
|
-
else raise "bummers! (#{m.inspect})"
|
|
358
|
-
end
|
|
359
|
-
}
|
|
360
|
-
}
|
|
361
|
-
return diffed
|
|
362
|
-
end
|
|
363
|
-
end # class DocDiff
|
|
364
|
-
|
|
365
|
-
if $0 == __FILE__
|
|
366
|
-
|
|
367
|
-
src = ARGF.read
|
|
368
|
-
enc, eol = DocDiff::CharString.guess_encoding(src),
|
|
369
|
-
DocDiff::CharString.guess_eol(src)
|
|
370
|
-
DocDiff.new.scan_text_for_diffs(src).each{|fragment|
|
|
371
|
-
if DocDiff::DiffFile.new('').guess_diff_type(fragment) == "unknown"
|
|
372
|
-
print fragment
|
|
373
|
-
else
|
|
374
|
-
diff = DocDiff::DiffFile.new(fragment).anatomize
|
|
375
|
-
print DocDiff::View.new(diff, enc, eol).to_tty
|
|
376
|
-
end
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
end
|