docdiff 0.5.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@
3
3
  # To use, include to String, or extend String.
4
4
  # 2003- Hisashi MORITA
5
5
 
6
+ class DocDiff
6
7
  module CharString
7
8
 
8
9
  Encodings = {}
@@ -72,9 +73,10 @@ module CharString
72
73
  # returns 'CR', 'LF', 'CRLF', 'UNKNOWN'(binary),
73
74
  # 'NONE'(1-line), or nil
74
75
  return nil if string == nil #=> nil (argument missing)
75
- eol_counts = {'CR' => string.scan(/(\r)(?!\n)/o).size,
76
- 'LF' => string.scan(/(?:\A|[^\r])(\n)/o).size,
77
- 'CRLF' => string.scan(/(\r\n)/o).size}
76
+ bin_string = string.dup.force_encoding("ASCII-8BIT")
77
+ eol_counts = {'CR' => bin_string.scan(/(\r)(?!\n)/o).size,
78
+ 'LF' => bin_string.scan(/(?:\A|[^\r])(\n)/o).size,
79
+ 'CRLF' => bin_string.scan(/(\r\n)/o).size}
78
80
  eol_counts.delete_if{|eol, count| count == 0} # Remove missing EOL
79
81
  eols = eol_counts.keys
80
82
  eol_variety = eols.size # numbers of flavors found
@@ -87,10 +89,6 @@ module CharString
87
89
  end
88
90
  end
89
91
 
90
- def CharString.ruby_m17n?
91
- "".respond_to?(:force_encoding)
92
- end
93
-
94
92
  # Note that some languages (like Japanese) do not have 'word' or 'phrase',
95
93
  # thus some of the following methods are not 'linguistically correct'.
96
94
 
@@ -128,7 +126,6 @@ module CharString
128
126
  }.compact.size
129
127
  end
130
128
 
131
- if ruby_m17n?
132
129
  # for Ruby-1.9
133
130
  def encoding()
134
131
  String.new(self).encoding.to_s
@@ -234,10 +231,11 @@ if ruby_m17n?
234
231
  end
235
232
 
236
233
  def count_graph_line()
234
+ graph = (Encodings['UTF-8']::GRAPH +
235
+ Encodings['UTF-8']::JA_GRAPH).chars.uniq.join
236
+ re_graph = Regexp.new("[#{Regexp.quote(graph)}]", Regexp::MULTILINE)
237
237
  split_to_line.collect{|line|
238
- line if Regexp.new("[#{Encodings['UTF-8']::GRAPH}" +
239
- "#{Encodings['UTF-8']::JA_GRAPH}]",
240
- Regexp::MULTILINE).match line.encode('UTF-8')
238
+ line if re_graph.match line.encode('UTF-8')
241
239
  }.compact.size
242
240
  end
243
241
 
@@ -254,280 +252,6 @@ if ruby_m17n?
254
252
  require 'docdiff/encoding/ja_eucjp'
255
253
  require 'docdiff/encoding/ja_sjis'
256
254
  require 'docdiff/encoding/ja_utf8'
257
- else
258
- # for Ruby-1.8
259
- require 'iconv'
260
-
261
- def encoding()
262
- @encoding
263
- # if @encoding
264
- # @encoding
265
- # else
266
- # @encoding = CharString.guess_encoding(self)
267
- # # raise "encoding is not set.\n"
268
- # end
269
- end
270
-
271
- def encoding=(cs)
272
- @encoding = cs
273
- extend Encodings[@encoding] # ; p "Hey, I extended #{Encodings[@encoding]}!"
274
- end
275
-
276
- # returns nil, 'US-ASCII', 'JIS', 'EUC-JP', 'Shift_JIS', 'UTF-8', or 'UNKNOWN'
277
- def CharString.guess_encoding(string)
278
- return nil if string == nil
279
- result_using_pureruby = CharString.guess_encoding_using_pureruby(string)
280
- result_using_iconv = CharString.guess_encoding_using_iconv(string)
281
- if result_using_pureruby == result_using_iconv
282
- result_using_pureruby
283
- else
284
- "UNKNOWN"
285
- end
286
- end
287
-
288
- # returns nil, 'US-ASCII', 'JIS', 'EUC-JP', 'Shift_JIS', 'UTF-8', or 'UNKNOWN'
289
- def CharString.guess_encoding_using_pureruby(string)
290
- return nil if string == nil
291
-
292
- ascii_pat = '[\x00-\x7f]'
293
- jis_pat = ['(?:(?:\x1b\x28\x42)',
294
- '|(?:\x1b\x28\x4a)',
295
- '|(?:\x1b\x28\x49)',
296
- '|(?:\x1b\x24\x40)',
297
- '|(?:\x1b\x24\x42)',
298
- '|(?:\x1b\x24\x44))'].join
299
- eucjp_pat = ['(?:(?:[\x00-\x1f\x7f])',
300
- '|(?:[\x20-\x7e])',
301
- '|(?:\x8e[\xa1-\xdf])',
302
- '|(?:[\xa1-\xfe][\xa1-\xfe])',
303
- '|(?:\x8f[\xa1-\xfe][\xa1-\xfe]))'].join
304
- sjis_pat = ['(?:(?:[\x00-\x1f\x7f])',
305
- '|(?:[\x20-\x7e])',
306
- '|(?:[\xa1-\xdf])',
307
- '|(?:[\x81-\x9f][\x40-\x7e])',
308
- '|(?:[\xe0-\xef][\x80-\xfc]))'].join
309
- utf8_pat = ['(?:(?:[\x00-\x7f])',
310
- '|(?:[\xc0-\xdf][\x80-\xbf])',
311
- '|(?:[\xe0-\xef][\x80-\xbf][\x80-\xbf])',
312
- '|(?:[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]))'].join
313
-
314
- ascii_match_length = string.scan(/#{ascii_pat}/on).join.length
315
- jis_escseq_count = string.scan(/#{jis_pat}/on).size
316
- eucjp_match_length = string.scan(/#{eucjp_pat}/no).join.length
317
- sjis_match_length = string.scan(/#{sjis_pat}/no).join.length
318
- utf8_match_length = string.scan(/#{utf8_pat}/no).join.length
319
-
320
- case
321
- when 0 < jis_escseq_count # JIS escape sequense found
322
- guessed_encoding = 'JIS'
323
- when ascii_match_length == string.length # every char is ASCII (but not JIS)
324
- guessed_encoding = 'US-ASCII'
325
- else
326
- case
327
- when eucjp_match_length < (string.length / 2) &&
328
- sjis_match_length < (string.length / 2) &&
329
- utf8_match_length < (string.length / 2)
330
- guessed_encoding = 'UNKNOWN' # either encoding did not match long enough
331
- when (eucjp_match_length < utf8_match_length) &&
332
- (sjis_match_length < utf8_match_length)
333
- guessed_encoding = 'UTF-8'
334
- when (eucjp_match_length < sjis_match_length) &&
335
- (utf8_match_length < sjis_match_length)
336
- guessed_encoding = 'Shift_JIS'
337
- when (sjis_match_length < eucjp_match_length) &&
338
- (utf8_match_length < eucjp_match_length)
339
- guessed_encoding = 'EUC-JP'
340
- else
341
- guessed_encoding = 'UNKNOWN' # cannot guess at all
342
- end
343
- end
344
- return guessed_encoding
345
- end
346
-
347
- def CharString.guess_encoding_using_iconv(string)
348
- valid_as_utf8 = CharString.valid_as("utf-8", string)
349
- valid_as_sjis = CharString.valid_as("cp932", string) # not sjis, but cp932
350
- valid_as_jis = CharString.valid_as("iso-2022-jp", string)
351
- valid_as_eucjp = CharString.valid_as("eucjp", string)
352
- valid_as_ascii = CharString.valid_as("ascii", string)
353
- invalid_as_utf8 = CharString.invalid_as("utf-8", string)
354
- invalid_as_sjis = CharString.invalid_as("cp932", string) # not sjis, but cp932
355
- invalid_as_jis = CharString.invalid_as("iso-2022-jp", string)
356
- invalid_as_eucjp = CharString.invalid_as("eucjp", string)
357
- invalid_as_ascii = CharString.invalid_as("ascii", string)
358
- case
359
- when string == nil
360
- nil
361
- when valid_as_ascii
362
- "US-ASCII"
363
- when valid_as_jis # Iconv sometimes recognizes JIS for ASCII, ignoring JIS escape sequence.
364
- "JIS"
365
- when valid_as_eucjp
366
- "EUC-JP"
367
- when valid_as_sjis && invalid_as_utf8 && invalid_as_eucjp && invalid_as_jis
368
- "Shift_JIS"
369
- when valid_as_utf8 && invalid_as_sjis && invalid_as_eucjp && invalid_as_jis
370
- "UTF-8"
371
- else
372
- "UNKNOWN"
373
- end
374
- end
375
-
376
- def CharString.valid_as(encoding_name, string)
377
- begin
378
- Iconv.iconv(encoding_name, encoding_name, string)
379
- rescue Iconv::IllegalSequence, Iconv::InvalidCharacter, Iconv::OutOfRange
380
- return false
381
- else
382
- return true
383
- end
384
- end
385
-
386
- def CharString.invalid_as(encoding_name, string)
387
- if CharString.valid_as(encoding_name, string)
388
- false
389
- else
390
- true
391
- end
392
- end
393
-
394
- def split_to_byte()
395
- scan(/./nm)
396
- end
397
-
398
- def split_to_char()
399
- raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
400
- # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
401
- if eol_char # sometimes string has no end-of-line char
402
- scan(Regexp.new("(?:#{eol_char})|(?:.)",
403
- Regexp::MULTILINE,
404
- encoding.sub(/ASCII/i, 'none'))
405
- )
406
- else # it seems that no EOL module was extended...
407
- scan(Regexp.new("(?:.)",
408
- Regexp::MULTILINE,
409
- encoding.sub(/ASCII/i, 'none'))
410
- )
411
- end
412
- end
413
-
414
- def count_latin_graph_char()
415
- raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
416
- # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
417
- scan(Regexp.new("[#{Encodings[encoding]::GRAPH}]",
418
- Regexp::MULTILINE,
419
- encoding.sub(/ASCII/i, 'none'))
420
- ).size
421
- end
422
-
423
- def count_ja_graph_char()
424
- raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
425
- # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
426
- scan(Regexp.new("[#{Encodings[encoding]::JA_GRAPH}]",
427
- Regexp::MULTILINE,
428
- encoding.sub(/ASCII/i, 'none'))
429
- ).size
430
- end
431
-
432
- def count_latin_blank_char()
433
- scan(Regexp.new("[#{Encodings[encoding]::BLANK}]",
434
- Regexp::MULTILINE,
435
- encoding.sub(/ASCII/i, 'none'))
436
- ).size
437
- end
438
-
439
- def count_ja_blank_char()
440
- scan(Regexp.new("[#{Encodings[encoding]::JA_BLANK}]",
441
- Regexp::MULTILINE,
442
- encoding.sub(/ASCII/i, 'none'))
443
- ).size
444
- end
445
-
446
- def split_to_word()
447
- raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
448
- # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
449
- scan(Regexp.new(Encodings[encoding]::WORD_REGEXP_SRC,
450
- Regexp::MULTILINE,
451
- encoding.sub(/ASCII/i, 'none'))
452
- )
453
- end
454
-
455
- def count_latin_word()
456
- split_to_word.collect{|word|
457
- word if Regexp.new("[#{Encodings[encoding]::PRINT}]",
458
- Regexp::MULTILINE,
459
- encoding.sub(/ASCII/i, 'none')).match word
460
- }.compact.size
461
- end
462
-
463
- def count_ja_word()
464
- split_to_word.collect{|word|
465
- word if Regexp.new("[#{Encodings[encoding]::JA_PRINT}]",
466
- Regexp::MULTILINE,
467
- encoding.sub(/ASCII/i, 'none')).match word
468
- }.compact.size
469
- end
470
-
471
- def count_latin_valid_word()
472
- split_to_word.collect{|word|
473
- word if Regexp.new("[#{Encodings[encoding]::ALNUM}]",
474
- Regexp::MULTILINE,
475
- encoding.sub(/ASCII/i, 'none')).match word
476
- }.compact.size
477
- end
478
-
479
- def count_ja_valid_word()
480
- split_to_word.collect{|word|
481
- word if Regexp.new("[#{Encodings[encoding]::JA_GRAPH}]",
482
- Regexp::MULTILINE,
483
- encoding.sub(/ASCII/i, 'none')).match word
484
- }.compact.size
485
- end
486
-
487
- def split_to_line()
488
- # scan(Regexp.new(".*?#{eol_char}|.+",
489
- # Regexp::MULTILINE,
490
- # encoding.sub(/ASCII/i, 'none'))
491
- # )
492
- raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
493
- raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
494
- if defined? eol_char
495
- scan(Regexp.new(".*?#{eol_char}|.+",
496
- Regexp::MULTILINE,
497
- encoding.sub(/ASCII/i, 'none'))
498
- )
499
- else
500
- scan(Regexp.new(".+",
501
- Regexp::MULTILINE,
502
- encoding.sub(/ASCII/i, 'none'))
503
- )
504
- end
505
- end
506
-
507
- def count_graph_line()
508
- split_to_line.collect{|line|
509
- line if Regexp.new("[#{Encodings[encoding]::GRAPH}" +
510
- "#{Encodings[encoding]::JA_GRAPH}]",
511
- Regexp::MULTILINE,
512
- encoding.sub(/ASCII/, 'none')).match line
513
- }.compact.size
514
- end
515
-
516
- def count_blank_line()
517
- split_to_line.collect{|line|
518
- line if Regexp.new("^[#{Encodings[encoding]::BLANK}" +
519
- "#{Encodings[encoding]::JA_BLANK}]+(?:#{eol_char})?",
520
- Regexp::MULTILINE,
521
- encoding.sub(/ASCII/, 'none')).match line
522
- }.compact.size
523
- end
524
-
525
- # load encoding modules
526
- require 'docdiff/encoding/en_ascii'
527
- require 'docdiff/encoding/ja_eucjp'
528
- require 'docdiff/encoding/ja_sjis'
529
- require 'docdiff/encoding/ja_utf8'
530
- end # end ruby_m17n?
531
255
  alias to_bytes split_to_byte
532
256
  alias to_chars split_to_char
533
257
  alias to_words split_to_word
@@ -573,6 +297,7 @@ end # end ruby_m17n?
573
297
  end
574
298
 
575
299
  end # module CharString
300
+ end # class DocDiff
576
301
 
577
302
  # class String
578
303
  # include CharString
@@ -46,6 +46,7 @@ Also in Nordic Journal of Computing (NJC), Vol. 2, No. 4, Winter 1995, 444 - 461
46
46
  http://web.informatik.uni-bonn.de/IV/Mitarbeiter/rick/lcs.dvi.Z
47
47
  =end
48
48
 
49
+ class DocDiff
49
50
  class Diff
50
51
  class Contours
51
52
  def initialize(a, b)
@@ -379,4 +380,4 @@ class Diff
379
380
  end
380
381
  end
381
382
  end
382
-
383
+ end # class DocDiff
@@ -1,6 +1,7 @@
1
1
  require 'docdiff/diff/rcsdiff'
2
2
  require 'docdiff/diff/unidiff'
3
3
 
4
+ class DocDiff
4
5
  class Diff
5
6
  class EditScript
6
7
  def initialize
@@ -146,3 +147,4 @@ class Diff
146
147
  end
147
148
  end
148
149
  end
150
+ end # class DocDiff
@@ -1,3 +1,4 @@
1
+ class DocDiff
1
2
  class Diff
2
3
  def Diff.rcsdiff(a, b)
3
4
  al = []
@@ -105,3 +106,4 @@ class Diff
105
106
  end
106
107
  end
107
108
  end
109
+ end # class DocDiff
@@ -6,6 +6,7 @@ An O(NP) Sequence Comparison Algorithm,
6
6
  Information Processing Letters 35, 1990, 317-323
7
7
  =end
8
8
 
9
+ class DocDiff
9
10
  class Diff
10
11
  class ShortestPath
11
12
  def initialize(a, b)
@@ -91,3 +92,4 @@ class Diff
91
92
  end
92
93
  end
93
94
  end
95
+ end # class DocDiff
@@ -2,6 +2,7 @@ require 'docdiff/diff/shortestpath'
2
2
  require 'docdiff/diff/contours'
3
3
  require 'thread'
4
4
 
5
+ class DocDiff
5
6
  class Diff
6
7
  class Speculative
7
8
  def initialize(a, b)
@@ -14,21 +15,22 @@ class Diff
14
15
  result = nil
15
16
 
16
17
  tg = ThreadGroup.new
18
+ m = Mutex.new
17
19
 
18
20
  # Since ShortestPath is faster than Contours if two sequences are very similar,
19
21
  # try it first.
20
22
  tg.add(Thread.new {
21
23
  #print "ShortestPath start.\n"
22
24
  result = ShortestPath.new(@a, @b).lcs
23
- Thread.exclusive {tg.list.each {|t| t.kill if t != Thread.current}}
25
+ m.synchronize {tg.list.each {|t| t.kill if t != Thread.current}}
24
26
  #print "ShortestPath win.\n"
25
27
  })
26
28
 
27
- # start Contours unless ShortestPath is already ended with first quantum,
29
+ # start Contours unless ShortestPath is already ended with first quantum,
28
30
  tg.add(Thread.new {
29
31
  #print "Contours start.\n"
30
32
  result = Contours.new(@a, @b).lcs
31
- Thread.exclusive {tg.list.each {|t| t.kill if t != Thread.current}}
33
+ m.synchronize {tg.list.each {|t| t.kill if t != Thread.current}}
32
34
  #print "Contours win.\n"
33
35
  }) unless tg.list.empty?
34
36
 
@@ -38,3 +40,4 @@ class Diff
38
40
  end
39
41
  end
40
42
  end
43
+ end # class DocDiff
@@ -1,3 +1,4 @@
1
+ class DocDiff
1
2
  class Diff
2
3
  class Subsequence
3
4
  def initialize
@@ -37,3 +38,4 @@ class Diff
37
38
  end
38
39
  end
39
40
  end
41
+ end # class DocDiff
@@ -1,3 +1,4 @@
1
+ class DocDiff
1
2
  class Diff
2
3
  def Diff.unidiff(a, b, algorithm=nil)
3
4
  al = []
@@ -19,7 +20,6 @@ class Diff
19
20
  end
20
21
 
21
22
  def unidiff(out='', context_lines=3)
22
- state = :common
23
23
  l1 = l2 = 1
24
24
  hunk = []
25
25
  hunk_l1 = hunk_l2 = 1
@@ -122,3 +122,4 @@ class Diff
122
122
  end
123
123
  end
124
124
  end
125
+ end # class DocDiff
data/lib/docdiff/diff.rb CHANGED
@@ -50,6 +50,7 @@ So, reduced input has following properties:
50
50
  * Any elemnt in B is also exist in A.
51
51
 
52
52
  =end
53
+ class DocDiff
53
54
  class Diff
54
55
  def initialize(a, b)
55
56
  @original_a = a
@@ -215,3 +216,4 @@ class Diff
215
216
  end
216
217
  end
217
218
  end
219
+ end # class DocDiff
@@ -4,6 +4,7 @@
4
4
 
5
5
  require 'docdiff/diff'
6
6
 
7
+ class DocDiff
7
8
  class Difference < Array
8
9
 
9
10
  # @resolution = nil # char, word, phrase, sentence, line, paragraph..
@@ -90,3 +91,4 @@ class Difference < Array
90
91
  end
91
92
 
92
93
  end # class Difference
94
+ end # class DocDiff
@@ -8,6 +8,7 @@ end
8
8
  class EOLDetectionFailure < Exception
9
9
  end
10
10
 
11
+ class DocDiff
11
12
  class Document
12
13
 
13
14
  def initialize(str, enc = nil, e = nil)
@@ -125,3 +126,4 @@ class Document
125
126
  end
126
127
 
127
128
  end # class Document
129
+ end # class DocDiff
@@ -1,6 +1,9 @@
1
1
  # English ASCII encoding module for CharString
2
2
  # 2003- Hisashi MORITA
3
3
 
4
+ # frozen_string_literal: false
5
+
6
+ class DocDiff
4
7
  module CharString
5
8
  module ASCII
6
9
 
@@ -13,50 +16,21 @@ module CharString
13
16
  SPACE = "\x09\x0a\x0b\x0c\x0d\x20"
14
17
  BLANK = "\x09\x20"
15
18
  DIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
16
- ALPHA = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
17
- "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
18
- "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
19
- "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
20
- "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
21
- "\x79\x7a"
22
- ALNUM = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
23
- "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
19
+ UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
24
20
  "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
25
- "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
26
- "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
27
- "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
28
- "\x79\x7a"
21
+ "\x55\x56\x57\x58\x59\x5a"
22
+ LOWER = "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
23
+ "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
24
+ "\x75\x76\x77\x78\x79\x7a"
25
+ ALPHA = UPPER + LOWER
26
+ ALNUM = DIGIT + ALPHA
29
27
  PUNCT = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
30
28
  "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
31
29
  "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
32
30
  "\x7d\x7e"
33
- LOWER = "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
34
- "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
35
- "\x75\x76\x77\x78\x79\x7a"
36
- UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
37
- "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
38
- "\x55\x56\x57\x58\x59\x5a"
39
- PRINT = "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29" \
40
- "\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33" \
41
- "\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d" \
42
- "\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47" \
43
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51" \
44
- "\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b" \
45
- "\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65" \
46
- "\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
47
- "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79" \
48
- "\x7a\x7b\x7c\x7d\x7e"
49
- GRAPH = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
50
- "\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34" \
51
- "\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e" \
52
- "\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48" \
53
- "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52" \
54
- "\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c" \
55
- "\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66" \
56
- "\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" \
57
- "\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a" \
58
- "\x7b\x7c\x7d\x7e"
59
- XDIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
31
+ GRAPH = DIGIT + UPPER + LOWER + PUNCT
32
+ PRINT = "\x20" + GRAPH
33
+ XDIGIT = DIGIT +
60
34
  "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
61
35
  "\x65\x66"
62
36
 
@@ -94,4 +68,5 @@ module CharString
94
68
  CharString.register_encoding(self)
95
69
 
96
70
  end # module ASCII
97
- end
71
+ end # module CharString
72
+ end # class DocDiff
@@ -1,6 +1,9 @@
1
1
  # Japanese EUC-JP encoding module for CharString
2
2
  # 2003- Hisashi MORITA
3
3
 
4
+ # frozen_string_literal: false
5
+
6
+ class DocDiff
4
7
  module CharString
5
8
  module EUC_JP
6
9
 
@@ -16,50 +19,21 @@ module CharString
16
19
  SPACE = "\x09\x0a\x0b\x0c\x0d\x20"
17
20
  BLANK = "\x09\x20"
18
21
  DIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
19
- ALPHA = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
20
- "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
21
- "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
22
- "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
23
- "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
24
- "\x79\x7a"
25
- ALNUM = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
26
- "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
22
+ UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
27
23
  "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
28
- "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
29
- "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
30
- "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
31
- "\x79\x7a"
24
+ "\x55\x56\x57\x58\x59\x5a"
25
+ LOWER = "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
26
+ "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
27
+ "\x75\x76\x77\x78\x79\x7a"
28
+ ALPHA = UPPER + LOWER
29
+ ALNUM = DIGIT + ALPHA
32
30
  PUNCT = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
33
31
  "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
34
32
  "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
35
33
  "\x7d\x7e"
36
- LOWER = "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
37
- "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
38
- "\x75\x76\x77\x78\x79\x7a"
39
- UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
40
- "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
41
- "\x55\x56\x57\x58\x59\x5a"
42
- PRINT = "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29" \
43
- "\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33" \
44
- "\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d" \
45
- "\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47" \
46
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51" \
47
- "\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b" \
48
- "\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65" \
49
- "\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
50
- "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79" \
51
- "\x7a\x7b\x7c\x7d\x7e"
52
- GRAPH = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
53
- "\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34" \
54
- "\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e" \
55
- "\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48" \
56
- "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52" \
57
- "\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c" \
58
- "\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66" \
59
- "\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" \
60
- "\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a" \
61
- "\x7b\x7c\x7d\x7e"
62
- XDIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
34
+ GRAPH = DIGIT + UPPER + LOWER + PUNCT
35
+ PRINT = "\x20" + GRAPH
36
+ XDIGIT = DIGIT +
63
37
  "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
64
38
  "\x65\x66"
65
39
  JA_SPACE = "\xa1\xa1"
@@ -266,4 +240,5 @@ module CharString
266
240
  CharString.register_encoding(self)
267
241
 
268
242
  end # module EUCJP
269
- end
243
+ end # module CharString
244
+ end # class DocDiff