docdiff 0.5.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/viewdiff.rb DELETED
@@ -1,375 +0,0 @@
1
- #!/usr/bin/ruby
2
- # 2005-08-29..xx-xx-xx Hisashi Morita
3
-
4
- require 'docdiff/difference'
5
- require 'docdiff/document'
6
- require 'docdiff/view'
7
- require 'docdiff/charstring'
8
-
9
- require "tempfile"
10
-
11
- # $KCODE="e"
12
-
13
- class String
14
- def to_lines
15
- scan(Regexp.new("(?:.*(?:\r\n|\r|\n|\z))", Regexp::MULTILINE))
16
- end
17
- end
18
-
19
- def scan_text_for_diffs(src)
20
- eol = "(?:\r\n|\n|\r)"
21
- pats = {
22
- :classic => "(?:[0-9]+(?:,[0-9]+)?[dac][0-9]+(?:,[0-9]+)?#{eol}.+?(?=^[^-<>0-9 ]))",
23
- :context => "(?:^\\*{3} .+?#{eol}--- .+?#{eol}.+?(?=^[^-+! *]|\\z))",
24
- :unified => "(?:^--- .+?#{eol}^\\+{3} .+?#{eol}.+?(?=^[^-+ @]|\\z))"
25
- }
26
- src.scan(/(?:#{pats.values.join("|")})|(?:.*?#{eol}+)/m)
27
- end
28
-
29
- class DiffFile < Array
30
-
31
- def initialize(src)
32
- src.extend(CharString)
33
- src.encoding = CharString.guess_encoding(src)
34
- src.eol = CharString.guess_eol(src)
35
- @src = src
36
- end
37
- attr_accessor :src
38
-
39
- def guess_diff_type(text)
40
- case
41
- when (/^[<>] /m).match(text) then return "classic"
42
- when (/^[-+!] /m).match(text) then return "context"
43
- when (/^[-+]/m).match(text) then return "unified"
44
- else return "unknown"
45
- end
46
- end
47
-
48
- def anatomize
49
- case guess_diff_type(@src)
50
- when "classic" then return anatomize_classic(@src)
51
- when "context" then return anatomize_context(@src)
52
- when "unified" then return anatomize_unified(@src)
53
- else
54
- raise "unsupported diff format: \n#{src.inspect}"
55
- end
56
- end
57
-
58
- end
59
-
60
- module ClassicDiff
61
- def eol
62
- "(?:\r\n|\n|\r)"
63
- end
64
- def noeol
65
- "(?:[^\r\n])"
66
- end
67
- def hunk_header
68
- "(?:[0-9]+(?:,[0-9]+)?[dac][0-9]+(?:,[0-9]+)?#{eol})"
69
- end
70
- def del
71
- "(?:^< ?#{noeol}*?#{eol})"
72
- end
73
- def sep
74
- "(?:^---#{eol})"
75
- end
76
- def add
77
- "(?:^> ?#{noeol}*?#{eol})"
78
- end
79
- def change
80
- "(?:#{del}+#{sep}#{add}+)"
81
- end
82
- def misc
83
- "(?:.*?(?:#{eol}|\z))"
84
- end
85
- def hunk
86
- "(?:#{hunk_header}(?:#{change}|#{del}+|#{add}+))"
87
- end
88
- def elements
89
- "(?:#{hunk}|#{misc})"
90
- end
91
- end
92
-
93
- def anatomize_classic(src)
94
- self.extend ClassicDiff
95
- diffed = []
96
- src_encoding = CharString.guess_encoding(src)
97
- src_eol = CharString.guess_eol(src)
98
- src.scan(Regexp.new(elements, Regexp::MULTILINE)){|m|
99
- case
100
- when /\A[0-9]/.match(m) then # hunk
101
- diffed.concat(anatomize_classic_hunk(m, src_encoding, src_eol))
102
- else # not hunk
103
- diffed.concat(Difference.new(m.split(/^/), m.split(/^/)))
104
- end
105
- }
106
- return diffed
107
- end
108
-
109
- def anatomize_classic_hunk(a_hunk, src_encoding, src_eol)
110
- self.extend ClassicDiff
111
- diffed = []
112
- a_hunk.scan(/(#{hunk_header})(#{change}|#{del}+|#{add}+)/){|n|
113
- head, body = [$1, $2].collect{|e|
114
- e.extend(CharString)
115
- e.encoding, e.eol = src_encoding, src_eol
116
- e
117
- }
118
- diffed.concat(Difference.new(head.to_words, head.to_words))
119
- case
120
- when /d/.match(head) # del
121
- diffed.concat(Difference.new(body.to_words, []))
122
- when /a/.match(head) # add
123
- diffed.concat(Difference.new([], body.to_words))
124
- when /c/.match(head) # change (need tweak)
125
- former, latter = body.split(/#{sep}/).collect{|e|
126
- e.extend(CharString)
127
- e.encoding, e.eol = src_encoding, src_eol
128
- e
129
- }
130
- d = Difference.new(former.to_words, latter.to_words)
131
- diffed_former = d.former_only
132
- diffed_latter = d.latter_only
133
- sepstr = /#{sep}/.match(body).to_s.extend(CharString)
134
- sepstr.encoding, sepstr.eol = src_encoding, src_eol
135
- sepelm = Difference.new(sepstr.to_words, sepstr.to_words)
136
- diffed.concat(diffed_former + sepelm + diffed_latter)
137
- else
138
- raise "invalid hunk header: #{head}"
139
- end
140
- }
141
- return diffed
142
- end
143
-
144
- module ContextDiff
145
- def eol
146
- "(?:\r\n|\n|\r|\\z)"
147
- end
148
- def noneol
149
- "(?:[^\r\n])"
150
- end
151
- def hunk_header
152
- "(?:\\*+#{eol})"
153
- end
154
- def hunk_subheader_former
155
- "(?:^\\*+ [0-9]+,[0-9]+ \\*+#{eol})"
156
- end
157
- def hunk_subheader_latter
158
- "(?:^-+ [0-9]+,[0-9]+ -+#{eol})"
159
- end
160
- def del
161
- "(?:^- #{noneol}*?#{eol})"
162
- end
163
- def add
164
- "(?:^\\+ #{noneol}*?#{eol})"
165
- end
166
- def change
167
- "(?:^! #{noneol}*?#{eol})"
168
- end
169
- def misc
170
- "(?:^[^-+!*]+?#{eol}+?)"
171
- end
172
- def any
173
- "(?:#{del}+|#{add}+|#{change}+|#{misc}+)"
174
- end
175
- def file_header
176
- "(?:[-\\*]{3} #{noneol}+?#{eol})"
177
- end
178
- def elements
179
- "(?:#{file_header}|#{hunk_header}#{hunk_subheader_former}#{any}*?#{hunk_subheader_latter}#{any}+|#{misc}|#{noneol}+#{eol})"
180
- end
181
- end
182
-
183
- def anatomize_context(src)
184
- self.extend ContextDiff
185
- diffed = []
186
- src_encoding = CharString.guess_encoding(src)
187
- src_eol = CharString.guess_eol(src)
188
- src.scan(/#{elements}/m){|m|
189
- case
190
- when /\A\*{10,}#{eol}^\*{3} /.match(m) then # hunk
191
- diffed.concat(anatomize_context_hunk(m, src_encoding, src_eol))
192
- else # not hunk
193
- m.extend(CharString)
194
- m.encoding, m.eol = src_encoding, src_eol
195
- diffed.concat(Difference.new(m.to_words, m.to_words))
196
- end
197
- }
198
- return diffed
199
- end
200
-
201
- def anatomize_context_hunk(a_hunk, src_encoding, src_eol)
202
- self.extend ContextDiff
203
- diffed = []
204
- h, sh_f, body_f, sh_l, body_l = nil
205
- a_hunk.scan(/(#{hunk_header})(#{hunk_subheader_former})(.*?)(#{hunk_subheader_latter})(.*?)\z/m){|m|
206
- h, sh_f, body_f, sh_l, body_l = m[0..4].collect{|he|
207
- if he
208
- he.extend(CharString)
209
- he.encoding, he.eol = src_encoding, src_eol
210
- end
211
- he
212
- }
213
- }
214
- diffed_former, diffed_latter = anatomize_context_hunk_scanbodies(body_f, body_l, src_encoding, src_eol)
215
- diffed.concat(Difference.new(h.to_words, h.to_words) +
216
- Difference.new(sh_f.to_words, sh_f.to_words) +
217
- diffed_former +
218
- Difference.new(sh_l.to_words, sh_l.to_words) +
219
- diffed_latter)
220
- return diffed
221
- end
222
-
223
- def anatomize_context_hunk_scanbodies(body_f, body_l, src_encoding, src_eol)
224
- body_f = '' if body_f.nil?
225
- body_l = '' if body_l.nil?
226
- self.extend ContextDiff
227
- changes_org = [[], []]
228
- changes_org[0], changes_org[1] = [body_f, body_l].collect{|b|
229
- b.scan(/#{change}+/).collect{|ch|
230
- if ch
231
- ch.extend(CharString)
232
- ch.encoding, ch.eol = src_encoding, src_eol
233
- end
234
- ch
235
- }
236
- }
237
- changes = changes_org.dup
238
- diffed = [[], []]
239
- [body_f, body_l].each_with_index{|half, i|
240
- changes[0], changes[1] = changes_org[0].dup, changes_org[1].dup
241
- half.scan(/(#{del}+)|(#{add}+)|(#{change}+)|(#{misc}+)/m){|elm|
242
- elm_d, elm_a, elm_c, elm_cmn = elm[0..3]
243
- [elm_d, elm_a, elm_c, elm_cmn].collect{|e|
244
- if e
245
- e.extend(CharString)
246
- e.encoding, e.eol = src_encoding, src_eol
247
- end
248
- e
249
- }
250
- case
251
- when elm_d then d = Difference.new(elm_d.to_words, [])
252
- when elm_a then d = Difference.new([], elm_a.to_words)
253
- when elm_c then d = Difference.new(changes[0].shift.to_words, changes[1].shift.to_words)
254
- case i
255
- when 0 then d = d.former_only
256
- when 1 then d = d.latter_only
257
- else raise
258
- end
259
- when elm_cmn then d = Difference.new(elm_cmn.to_words, elm_cmn.to_words)
260
- else
261
- raise "bummers!"
262
- end
263
- diffed[i].concat(d)
264
- } # end half.scan
265
- } # end each_with_index
266
- return diffed
267
- end
268
-
269
- module UnifiedDiff
270
- def eol
271
- "(?:\r\n|\n|\r|\z)"
272
- end
273
- def noneol
274
- "(?:[^\r\n])"
275
- end
276
- def hunk_header
277
- "(?:@@ #{noneol}+#{eol})"
278
- end
279
- def del
280
- "(?:^-#{noneol}*?#{eol})"
281
- end
282
- def add
283
- "(?:^\\+#{noneol}*?#{eol})"
284
- end
285
- def change
286
- "(?:#{del}+#{add}+)"
287
- end
288
- def common
289
- "(?:^ #{noneol}*?#{eol})"
290
- end
291
- def misc
292
- "(?:^[^-+]+?#{eol}+?)"
293
- end
294
- def any
295
- "(?:#{del}+|#{add}+|#{change}+|#{common}+|#{misc}+)"
296
- end
297
- def file_header
298
- "(?:^[^-+@ ]#{noneol}+#{eol}(?:^[-+]{3} #{noneol}+#{eol}){2})"
299
- end
300
- def elements
301
- "(?:#{file_header}|#{hunk_header}#{any}+?|#{misc}|#{noneol}+#{eol})"
302
- end
303
- end
304
-
305
- def anatomize_unified(src)
306
- self.extend UnifiedDiff
307
- diffed = []
308
- src_encoding = CharString.guess_encoding(src)
309
- src_eol = CharString.guess_eol(src)
310
- src.scan(/#{elements}/m){|m|
311
- case
312
- when /\A@@ /.match(m) then # hunk
313
- diffed.concat(anatomize_unified_hunk(m.to_s, src_encoding, src_eol))
314
- else # not hunk
315
- m.extend(CharString)
316
- m.encoding, m.eol = src_encoding, src_eol
317
- diffed.concat(Difference.new(m.to_words, m.to_words))
318
- end
319
- }
320
- return diffed
321
- end
322
-
323
- def anatomize_unified_hunk(a_hunk, src_encoding, src_eol)
324
- self.extend UnifiedDiff
325
- diffed = []
326
- a_hunk.scan(/(#{hunk_header})(#{any}+#{eol}?)/m){|m|
327
- head, body = m[0], m[1]
328
- [head, body].collect{|e|
329
- e.extend(CharString)
330
- e.encoding, e.eol = src_encoding, src_eol
331
- }
332
- diffed.concat(Difference.new(head.to_words, head.to_words))
333
- body.scan(/(#{del}+)(#{add}+)|(#{del}+#{eol}?)|(#{add}+)|(#{common}+#{eol}?)|(.*#{eol}?)/m){|m|
334
- cf, cl, d, a, cmn, msc = m[0..5]
335
- [cf, cl, d, a, cmn, msc].collect{|e|
336
- e.extend(CharString)
337
- e.encoding, e.eol = src_encoding, src_eol
338
- }
339
- case
340
- when (cf and cl) then
341
- Difference.new(cf.to_words, cl.to_words).each{|e|
342
- case e.first
343
- when :change_elt then diffed << [:change_elt, e[1], nil]
344
- diffed << [:change_elt, nil, e[2]]
345
- when :del_elt then diffed << [:change_elt, e[1], nil]
346
- when :add_elt then diffed << [:change_elt, nil, e[2]]
347
- when :common_elt_elt then diffed << e
348
- else raise "bummers! (#{e.inspect})"
349
- end
350
- }
351
- when d then diffed.concat(Difference.new(d.to_words, []))
352
- when a then diffed.concat(Difference.new([], a.to_words))
353
- when cmn then diffed.concat(Difference.new(cmn.to_words, cmn.to_words))
354
- when msc then diffed.concat(Difference.new(msc.to_words, msc.to_words))
355
- else raise "bummers! (#{m.inspect})"
356
- end
357
- }
358
- }
359
- return diffed
360
- end
361
-
362
- if $0 == __FILE__
363
-
364
- src = ARGF.read
365
- enc, eol = CharString.guess_encoding(src), CharString.guess_eol(src)
366
- scan_text_for_diffs(src).each{|fragment|
367
- if DiffFile.new('').guess_diff_type(fragment) == "unknown"
368
- print fragment
369
- else
370
- diff = DiffFile.new(fragment).anatomize
371
- print View.new(diff, enc, eol).to_tty
372
- end
373
- }
374
-
375
- end