docdiff 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/viewdiff.rb DELETED
@@ -1,379 +0,0 @@
1
- #!/usr/bin/ruby
2
- # 2005-08-29..xx-xx-xx Hisashi Morita
3
-
4
- require 'docdiff/difference'
5
- require 'docdiff/document'
6
- require 'docdiff/view'
7
- require 'docdiff/charstring'
8
-
9
- require "tempfile"
10
-
11
- # $KCODE="e"
12
-
13
- class String
14
- def to_lines
15
- scan(Regexp.new("(?:.*(?:\r\n|\r|\n|\z))", Regexp::MULTILINE))
16
- end
17
- end
18
-
19
- class DocDiff
20
- def scan_text_for_diffs(src)
21
- eol = "(?:\r\n|\n|\r)"
22
- pats = {
23
- :classic => "(?:[0-9]+(?:,[0-9]+)?[dac][0-9]+(?:,[0-9]+)?#{eol}.+?(?=^[^-<>0-9 ]))",
24
- :context => "(?:^\\*{3} .+?#{eol}--- .+?#{eol}.+?(?=^[^-+! *]|\\z))",
25
- :unified => "(?:^--- .+?#{eol}^\\+{3} .+?#{eol}.+?(?=^[^-+ @]|\\z))"
26
- }
27
- src.scan(/(?:#{pats.values.join("|")})|(?:.*?#{eol}+)/m)
28
- end
29
-
30
- class DiffFile < Array
31
-
32
- def initialize(src)
33
- src.extend(CharString)
34
- src.encoding = CharString.guess_encoding(src)
35
- src.eol = CharString.guess_eol(src)
36
- @src = src
37
- end
38
- attr_accessor :src
39
-
40
- def guess_diff_type(text)
41
- case
42
- when (/^[<>] /m).match(text) then return "classic"
43
- when (/^[-+!] /m).match(text) then return "context"
44
- when (/^[-+]/m).match(text) then return "unified"
45
- else return "unknown"
46
- end
47
- end
48
-
49
- def anatomize
50
- case guess_diff_type(@src)
51
- when "classic" then return anatomize_classic(@src)
52
- when "context" then return anatomize_context(@src)
53
- when "unified" then return anatomize_unified(@src)
54
- else
55
- raise "unsupported diff format: \n#{src.inspect}"
56
- end
57
- end
58
-
59
- end
60
-
61
- module ClassicDiff
62
- def eol
63
- "(?:\r\n|\n|\r)"
64
- end
65
- def noeol
66
- "(?:[^\r\n])"
67
- end
68
- def hunk_header
69
- "(?:[0-9]+(?:,[0-9]+)?[dac][0-9]+(?:,[0-9]+)?#{eol})"
70
- end
71
- def del
72
- "(?:^< ?#{noeol}*?#{eol})"
73
- end
74
- def sep
75
- "(?:^---#{eol})"
76
- end
77
- def add
78
- "(?:^> ?#{noeol}*?#{eol})"
79
- end
80
- def change
81
- "(?:#{del}+#{sep}#{add}+)"
82
- end
83
- def misc
84
- "(?:.*?(?:#{eol}|\z))"
85
- end
86
- def hunk
87
- "(?:#{hunk_header}(?:#{change}|#{del}+|#{add}+))"
88
- end
89
- def elements
90
- "(?:#{hunk}|#{misc})"
91
- end
92
- end
93
-
94
- def anatomize_classic(src)
95
- self.extend ClassicDiff
96
- diffed = []
97
- src_encoding = CharString.guess_encoding(src)
98
- src_eol = CharString.guess_eol(src)
99
- src.scan(Regexp.new(elements, Regexp::MULTILINE)){|m|
100
- case
101
- when /\A[0-9]/.match(m) then # hunk
102
- diffed.concat(anatomize_classic_hunk(m, src_encoding, src_eol))
103
- else # not hunk
104
- diffed.concat(Difference.new(m.split(/^/), m.split(/^/)))
105
- end
106
- }
107
- return diffed
108
- end
109
-
110
- def anatomize_classic_hunk(a_hunk, src_encoding, src_eol)
111
- self.extend ClassicDiff
112
- diffed = []
113
- a_hunk.scan(/(#{hunk_header})(#{change}|#{del}+|#{add}+)/){|n|
114
- head, body = [$1, $2].collect{|e|
115
- e.extend(CharString)
116
- e.encoding, e.eol = src_encoding, src_eol
117
- e
118
- }
119
- diffed.concat(Difference.new(head.to_words, head.to_words))
120
- case
121
- when /d/.match(head) # del
122
- diffed.concat(Difference.new(body.to_words, []))
123
- when /a/.match(head) # add
124
- diffed.concat(Difference.new([], body.to_words))
125
- when /c/.match(head) # change (need tweak)
126
- former, latter = body.split(/#{sep}/).collect{|e|
127
- e.extend(CharString)
128
- e.encoding, e.eol = src_encoding, src_eol
129
- e
130
- }
131
- d = Difference.new(former.to_words, latter.to_words)
132
- diffed_former = d.former_only
133
- diffed_latter = d.latter_only
134
- sepstr = /#{sep}/.match(body).to_s.extend(CharString)
135
- sepstr.encoding, sepstr.eol = src_encoding, src_eol
136
- sepelm = Difference.new(sepstr.to_words, sepstr.to_words)
137
- diffed.concat(diffed_former + sepelm + diffed_latter)
138
- else
139
- raise "invalid hunk header: #{head}"
140
- end
141
- }
142
- return diffed
143
- end
144
-
145
- module ContextDiff
146
- def eol
147
- "(?:\r\n|\n|\r|\\z)"
148
- end
149
- def noneol
150
- "(?:[^\r\n])"
151
- end
152
- def hunk_header
153
- "(?:\\*+#{eol})"
154
- end
155
- def hunk_subheader_former
156
- "(?:^\\*+ [0-9]+,[0-9]+ \\*+#{eol})"
157
- end
158
- def hunk_subheader_latter
159
- "(?:^-+ [0-9]+,[0-9]+ -+#{eol})"
160
- end
161
- def del
162
- "(?:^- #{noneol}*?#{eol})"
163
- end
164
- def add
165
- "(?:^\\+ #{noneol}*?#{eol})"
166
- end
167
- def change
168
- "(?:^! #{noneol}*?#{eol})"
169
- end
170
- def misc
171
- "(?:^[^-+!*]+?#{eol}+?)"
172
- end
173
- def any
174
- "(?:#{del}+|#{add}+|#{change}+|#{misc}+)"
175
- end
176
- def file_header
177
- "(?:[-\\*]{3} #{noneol}+?#{eol})"
178
- end
179
- def elements
180
- "(?:#{file_header}|#{hunk_header}#{hunk_subheader_former}#{any}*?#{hunk_subheader_latter}#{any}+|#{misc}|#{noneol}+#{eol})"
181
- end
182
- end
183
-
184
- def anatomize_context(src)
185
- self.extend ContextDiff
186
- diffed = []
187
- src_encoding = CharString.guess_encoding(src)
188
- src_eol = CharString.guess_eol(src)
189
- src.scan(/#{elements}/m){|m|
190
- case
191
- when /\A\*{10,}#{eol}^\*{3} /.match(m) then # hunk
192
- diffed.concat(anatomize_context_hunk(m, src_encoding, src_eol))
193
- else # not hunk
194
- m.extend(CharString)
195
- m.encoding, m.eol = src_encoding, src_eol
196
- diffed.concat(Difference.new(m.to_words, m.to_words))
197
- end
198
- }
199
- return diffed
200
- end
201
-
202
- def anatomize_context_hunk(a_hunk, src_encoding, src_eol)
203
- self.extend ContextDiff
204
- diffed = []
205
- h, sh_f, body_f, sh_l, body_l = nil
206
- a_hunk.scan(/(#{hunk_header})(#{hunk_subheader_former})(.*?)(#{hunk_subheader_latter})(.*?)\z/m){|m|
207
- h, sh_f, body_f, sh_l, body_l = m[0..4].collect{|he|
208
- if he
209
- he.extend(CharString)
210
- he.encoding, he.eol = src_encoding, src_eol
211
- end
212
- he
213
- }
214
- }
215
- diffed_former, diffed_latter = anatomize_context_hunk_scanbodies(body_f, body_l, src_encoding, src_eol)
216
- diffed.concat(Difference.new(h.to_words, h.to_words) +
217
- Difference.new(sh_f.to_words, sh_f.to_words) +
218
- diffed_former +
219
- Difference.new(sh_l.to_words, sh_l.to_words) +
220
- diffed_latter)
221
- return diffed
222
- end
223
-
224
- def anatomize_context_hunk_scanbodies(body_f, body_l, src_encoding, src_eol)
225
- body_f = '' if body_f.nil?
226
- body_l = '' if body_l.nil?
227
- self.extend ContextDiff
228
- changes_org = [[], []]
229
- changes_org[0], changes_org[1] = [body_f, body_l].collect{|b|
230
- b.scan(/#{change}+/).collect{|ch|
231
- if ch
232
- ch.extend(CharString)
233
- ch.encoding, ch.eol = src_encoding, src_eol
234
- end
235
- ch
236
- }
237
- }
238
- changes = changes_org.dup
239
- diffed = [[], []]
240
- [body_f, body_l].each_with_index{|half, i|
241
- changes[0], changes[1] = changes_org[0].dup, changes_org[1].dup
242
- half.scan(/(#{del}+)|(#{add}+)|(#{change}+)|(#{misc}+)/m){|elm|
243
- elm_d, elm_a, elm_c, elm_cmn = elm[0..3]
244
- [elm_d, elm_a, elm_c, elm_cmn].collect{|e|
245
- if e
246
- e.extend(CharString)
247
- e.encoding, e.eol = src_encoding, src_eol
248
- end
249
- e
250
- }
251
- case
252
- when elm_d then d = Difference.new(elm_d.to_words, [])
253
- when elm_a then d = Difference.new([], elm_a.to_words)
254
- when elm_c then d = Difference.new(changes[0].shift.to_words, changes[1].shift.to_words)
255
- case i
256
- when 0 then d = d.former_only
257
- when 1 then d = d.latter_only
258
- else raise
259
- end
260
- when elm_cmn then d = Difference.new(elm_cmn.to_words, elm_cmn.to_words)
261
- else
262
- raise "bummers!"
263
- end
264
- diffed[i].concat(d)
265
- } # end half.scan
266
- } # end each_with_index
267
- return diffed
268
- end
269
-
270
- module UnifiedDiff
271
- def eol
272
- "(?:\r\n|\n|\r|\z)"
273
- end
274
- def noneol
275
- "(?:[^\r\n])"
276
- end
277
- def hunk_header
278
- "(?:@@ #{noneol}+#{eol})"
279
- end
280
- def del
281
- "(?:^-#{noneol}*?#{eol})"
282
- end
283
- def add
284
- "(?:^\\+#{noneol}*?#{eol})"
285
- end
286
- def change
287
- "(?:#{del}+#{add}+)"
288
- end
289
- def common
290
- "(?:^ #{noneol}*?#{eol})"
291
- end
292
- def misc
293
- "(?:^[^-+]+?#{eol}+?)"
294
- end
295
- def any
296
- "(?:#{del}+|#{add}+|#{change}+|#{common}+|#{misc}+)"
297
- end
298
- def file_header
299
- "(?:^[^-+@ ]#{noneol}+#{eol}(?:^[-+]{3} #{noneol}+#{eol}){2})"
300
- end
301
- def elements
302
- "(?:#{file_header}|#{hunk_header}#{any}+?|#{misc}|#{noneol}+#{eol})"
303
- end
304
- end
305
-
306
- def anatomize_unified(src)
307
- self.extend UnifiedDiff
308
- diffed = []
309
- src_encoding = CharString.guess_encoding(src)
310
- src_eol = CharString.guess_eol(src)
311
- src.scan(/#{elements}/m){|m|
312
- case
313
- when /\A@@ /.match(m) then # hunk
314
- diffed.concat(anatomize_unified_hunk(m.to_s, src_encoding, src_eol))
315
- else # not hunk
316
- m.extend(CharString)
317
- m.encoding, m.eol = src_encoding, src_eol
318
- diffed.concat(Difference.new(m.to_words, m.to_words))
319
- end
320
- }
321
- return diffed
322
- end
323
-
324
- def anatomize_unified_hunk(a_hunk, src_encoding, src_eol)
325
- self.extend UnifiedDiff
326
- diffed = []
327
- a_hunk.scan(/(#{hunk_header})(#{any}+#{eol}?)/m){|m|
328
- head, body = m[0], m[1]
329
- [head, body].collect{|e|
330
- e.extend(CharString)
331
- e.encoding, e.eol = src_encoding, src_eol
332
- }
333
- diffed.concat(Difference.new(head.to_words, head.to_words))
334
- body.scan(/(#{del}+)(#{add}+)|(#{del}+#{eol}?)|(#{add}+)|(#{common}+#{eol}?)|(.*#{eol}?)/m){|m|
335
- cf, cl, d, a, cmn, msc = m[0..5]
336
- [cf, cl, d, a, cmn, msc].collect{|e|
337
- next if e.nil?
338
- e.extend(CharString)
339
- e.encoding, e.eol = src_encoding, src_eol
340
- }
341
- case
342
- when (cf and cl) then
343
- Difference.new(cf.to_words, cl.to_words).each{|e|
344
- case e.first
345
- when :change_elt then diffed << [:change_elt, e[1], nil]
346
- diffed << [:change_elt, nil, e[2]]
347
- when :del_elt then diffed << [:change_elt, e[1], nil]
348
- when :add_elt then diffed << [:change_elt, nil, e[2]]
349
- when :common_elt_elt then diffed << e
350
- else raise "bummers! (#{e.inspect})"
351
- end
352
- }
353
- when d then diffed.concat(Difference.new(d.to_words, []))
354
- when a then diffed.concat(Difference.new([], a.to_words))
355
- when cmn then diffed.concat(Difference.new(cmn.to_words, cmn.to_words))
356
- when msc then diffed.concat(Difference.new(msc.to_words, msc.to_words))
357
- else raise "bummers! (#{m.inspect})"
358
- end
359
- }
360
- }
361
- return diffed
362
- end
363
- end # class DocDiff
364
-
365
- if $0 == __FILE__
366
-
367
- src = ARGF.read
368
- enc, eol = DocDiff::CharString.guess_encoding(src),
369
- DocDiff::CharString.guess_eol(src)
370
- DocDiff.new.scan_text_for_diffs(src).each{|fragment|
371
- if DocDiff::DiffFile.new('').guess_diff_type(fragment) == "unknown"
372
- print fragment
373
- else
374
- diff = DocDiff::DiffFile.new(fragment).anatomize
375
- print DocDiff::View.new(diff, enc, eol).to_tty
376
- end
377
- }
378
-
379
- end