RubyGems - docdiff - Versions diffs - 0.5.0 → 0.6.2 - Mend

docdiff 0.5.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +7 -0
data/.travis.yml +5 -3
data/Gemfile +1 -1
data/Makefile +15 -19
data/Rakefile +45 -10
data/bin/docdiff +25 -13
data/devutil/Rakefile +9 -0
data/devutil/changelog.sh +40 -0
data/docdiff.gemspec +4 -4
data/docdiffwebui.cgi +1 -1
data/langfilter.rb +1 -5
data/lib/doc_diff.rb +5 -1
data/lib/docdiff/charstring.rb +10 -285
data/lib/docdiff/diff/contours.rb +2 -1
data/lib/docdiff/diff/editscript.rb +2 -0
data/lib/docdiff/diff/rcsdiff.rb +2 -0
data/lib/docdiff/diff/shortestpath.rb +2 -0
data/lib/docdiff/diff/speculative.rb +6 -3
data/lib/docdiff/diff/subsequence.rb +2 -0
data/lib/docdiff/diff/unidiff.rb +2 -1
data/lib/docdiff/diff.rb +2 -0
data/lib/docdiff/difference.rb +2 -0
data/lib/docdiff/document.rb +2 -0
data/lib/docdiff/encoding/en_ascii.rb +15 -40
data/lib/docdiff/encoding/ja_eucjp.rb +15 -40
data/lib/docdiff/encoding/ja_sjis.rb +15 -40
data/lib/docdiff/encoding/ja_utf8.rb +15 -40
data/lib/docdiff/version.rb +1 -1
data/lib/docdiff/view.rb +16 -14
data/lib/docdiff.rb +1 -1
data/readme.html +41 -4
data/readme.md +185 -0
data/test/charstring_test.rb +16 -26
data/test/diff_test.rb +2 -1
data/test/difference_test.rb +2 -1
data/test/docdiff_test.rb +12 -3
data/test/document_test.rb +7 -6
data/test/view_test.rb +3 -1
metadata +23 -34
data/devutil/JIS0208.TXT +0 -6952
data/lib/viewdiff.rb +0 -375
data/test/viewdiff_test.rb +0 -908

data/lib/docdiff/charstring.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 # To use, include to String, or extend String.
 # 2003- Hisashi MORITA
+class DocDiff
 module CharString
   Encodings = {}
@@ -72,9 +73,10 @@ module CharString
     # returns 'CR', 'LF', 'CRLF', 'UNKNOWN'(binary),
     # 'NONE'(1-line), or nil
     return nil if string == nil  #=> nil (argument missing)
-    eol_counts = {'CR'   => string.scan(/(\r)(?!\n)/o).size,
-                  'LF'   => string.scan(/(?:\A|[^\r])(\n)/o).size,
-                  'CRLF' => string.scan(/(\r\n)/o).size}
+    bin_string = string.dup.force_encoding("ASCII-8BIT")
+    eol_counts = {'CR'   => bin_string.scan(/(\r)(?!\n)/o).size,
+                  'LF'   => bin_string.scan(/(?:\A|[^\r])(\n)/o).size,
+                  'CRLF' => bin_string.scan(/(\r\n)/o).size}
     eol_counts.delete_if{|eol, count| count == 0}  # Remove missing EOL
     eols = eol_counts.keys
     eol_variety = eols.size  # numbers of flavors found
@@ -87,10 +89,6 @@ module CharString
     end
   end
-  def CharString.ruby_m17n?
-    "".respond_to?(:force_encoding)
-  end
   # Note that some languages (like Japanese) do not have 'word' or 'phrase',
   # thus some of the following methods are not 'linguistically correct'.
@@ -128,7 +126,6 @@ module CharString
     }.compact.size
   end
-if ruby_m17n?
   # for Ruby-1.9
   def encoding()
     String.new(self).encoding.to_s
@@ -234,10 +231,11 @@ if ruby_m17n?
   end
   def count_graph_line()
+    graph = (Encodings['UTF-8']::GRAPH +
+             Encodings['UTF-8']::JA_GRAPH).chars.uniq.join
+    re_graph = Regexp.new("[#{Regexp.quote(graph)}]", Regexp::MULTILINE)
     split_to_line.collect{|line|
-      line if Regexp.new("[#{Encodings['UTF-8']::GRAPH}" +
-                         "#{Encodings['UTF-8']::JA_GRAPH}]",
-                         Regexp::MULTILINE).match line.encode('UTF-8')
+      line if re_graph.match line.encode('UTF-8')
     }.compact.size
   end
@@ -254,280 +252,6 @@ if ruby_m17n?
   require 'docdiff/encoding/ja_eucjp'
   require 'docdiff/encoding/ja_sjis'
   require 'docdiff/encoding/ja_utf8'
-else
-  # for Ruby-1.8
-  require 'iconv'
-  def encoding()
-    @encoding
-#     if @encoding
-#       @encoding
-#     else
-#       @encoding = CharString.guess_encoding(self)
-#       # raise "encoding is not set.\n"
-#     end
-  end
-  def encoding=(cs)
-    @encoding = cs
-    extend Encodings[@encoding]  # ; p "Hey, I extended #{Encodings[@encoding]}!"
-  end
-  # returns nil, 'US-ASCII', 'JIS', 'EUC-JP', 'Shift_JIS', 'UTF-8', or 'UNKNOWN'
-  def CharString.guess_encoding(string)
-    return nil if string == nil
-    result_using_pureruby = CharString.guess_encoding_using_pureruby(string)
-    result_using_iconv    = CharString.guess_encoding_using_iconv(string)
-    if result_using_pureruby == result_using_iconv
-      result_using_pureruby
-    else
-      "UNKNOWN"
-    end
-  end
-  # returns nil, 'US-ASCII', 'JIS', 'EUC-JP', 'Shift_JIS', 'UTF-8', or 'UNKNOWN'
-  def CharString.guess_encoding_using_pureruby(string)
-    return nil if string == nil
-    ascii_pat = '[\x00-\x7f]'
-    jis_pat   = ['(?:(?:\x1b\x28\x42)',
-                 '|(?:\x1b\x28\x4a)',
-                 '|(?:\x1b\x28\x49)',
-                 '|(?:\x1b\x24\x40)',
-                 '|(?:\x1b\x24\x42)',
-                 '|(?:\x1b\x24\x44))'].join
-    eucjp_pat = ['(?:(?:[\x00-\x1f\x7f])',
-                 '|(?:[\x20-\x7e])',
-                 '|(?:\x8e[\xa1-\xdf])',
-                 '|(?:[\xa1-\xfe][\xa1-\xfe])',
-                 '|(?:\x8f[\xa1-\xfe][\xa1-\xfe]))'].join
-    sjis_pat  = ['(?:(?:[\x00-\x1f\x7f])',
-                 '|(?:[\x20-\x7e])',
-                 '|(?:[\xa1-\xdf])',
-                 '|(?:[\x81-\x9f][\x40-\x7e])',
-                 '|(?:[\xe0-\xef][\x80-\xfc]))'].join
-    utf8_pat  = ['(?:(?:[\x00-\x7f])',
-                 '|(?:[\xc0-\xdf][\x80-\xbf])',
-                 '|(?:[\xe0-\xef][\x80-\xbf][\x80-\xbf])',
-                 '|(?:[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]))'].join
-    ascii_match_length = string.scan(/#{ascii_pat}/on).join.length
-    jis_escseq_count   = string.scan(/#{jis_pat}/on).size
-    eucjp_match_length = string.scan(/#{eucjp_pat}/no).join.length
-    sjis_match_length  = string.scan(/#{sjis_pat}/no).join.length
-    utf8_match_length  = string.scan(/#{utf8_pat}/no).join.length
-    case
-    when 0 < jis_escseq_count                 # JIS escape sequense found
-      guessed_encoding = 'JIS'
-    when ascii_match_length == string.length  # every char is ASCII (but not JIS)
-      guessed_encoding = 'US-ASCII'
-    else
-      case
-      when eucjp_match_length < (string.length / 2) &&
-           sjis_match_length  < (string.length / 2) &&
-           utf8_match_length  < (string.length / 2)
-        guessed_encoding = 'UNKNOWN'  # either encoding did not match long enough
-      when (eucjp_match_length < utf8_match_length) &&
-           (sjis_match_length < utf8_match_length)
-        guessed_encoding = 'UTF-8'
-      when (eucjp_match_length < sjis_match_length) &&
-           (utf8_match_length < sjis_match_length)
-        guessed_encoding = 'Shift_JIS'
-      when (sjis_match_length < eucjp_match_length) &&
-           (utf8_match_length < eucjp_match_length)
-        guessed_encoding = 'EUC-JP'
-      else
-        guessed_encoding = 'UNKNOWN'  # cannot guess at all
-      end
-    end
-    return guessed_encoding
-  end
-  def CharString.guess_encoding_using_iconv(string)
-    valid_as_utf8   = CharString.valid_as("utf-8", string)
-    valid_as_sjis   = CharString.valid_as("cp932", string) # not sjis, but cp932
-    valid_as_jis    = CharString.valid_as("iso-2022-jp", string)
-    valid_as_eucjp  = CharString.valid_as("eucjp", string)
-    valid_as_ascii  = CharString.valid_as("ascii", string)
-    invalid_as_utf8   = CharString.invalid_as("utf-8", string)
-    invalid_as_sjis   = CharString.invalid_as("cp932", string) # not sjis, but cp932
-    invalid_as_jis    = CharString.invalid_as("iso-2022-jp", string)
-    invalid_as_eucjp  = CharString.invalid_as("eucjp", string)
-    invalid_as_ascii  = CharString.invalid_as("ascii", string)
-    case
-    when string == nil
-      nil
-    when valid_as_ascii
-      "US-ASCII"
-    when valid_as_jis  # Iconv sometimes recognizes JIS for ASCII, ignoring JIS escape sequence.
-      "JIS"
-    when valid_as_eucjp
-      "EUC-JP"
-    when valid_as_sjis && invalid_as_utf8 && invalid_as_eucjp && invalid_as_jis
-      "Shift_JIS"
-    when valid_as_utf8 && invalid_as_sjis && invalid_as_eucjp && invalid_as_jis
-      "UTF-8"
-    else
-      "UNKNOWN"
-    end
-  end
-  def CharString.valid_as(encoding_name, string)
-    begin
-      Iconv.iconv(encoding_name, encoding_name, string)
-    rescue Iconv::IllegalSequence, Iconv::InvalidCharacter, Iconv::OutOfRange
-      return false
-    else
-      return true
-    end
-  end
-  def CharString.invalid_as(encoding_name, string)
-    if CharString.valid_as(encoding_name, string)
-      false
-    else
-      true
-    end
-  end
-  def split_to_byte()
-    scan(/./nm)
-  end
-  def split_to_char()
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    if eol_char  # sometimes string has no end-of-line char
-      scan(Regexp.new("(?:#{eol_char})|(?:.)",
-                      Regexp::MULTILINE,
-                      encoding.sub(/ASCII/i, 'none'))
-      )
-    else                  # it seems that no EOL module was extended...
-      scan(Regexp.new("(?:.)",
-                      Regexp::MULTILINE,
-                      encoding.sub(/ASCII/i, 'none'))
-      )
-    end
-  end
-  def count_latin_graph_char()
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    scan(Regexp.new("[#{Encodings[encoding]::GRAPH}]",
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    ).size
-  end
-  def count_ja_graph_char()
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    scan(Regexp.new("[#{Encodings[encoding]::JA_GRAPH}]",
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    ).size
-  end
-  def count_latin_blank_char()
-    scan(Regexp.new("[#{Encodings[encoding]::BLANK}]",
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    ).size
-  end
-  def count_ja_blank_char()
-    scan(Regexp.new("[#{Encodings[encoding]::JA_BLANK}]",
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    ).size
-  end
-  def split_to_word()
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    scan(Regexp.new(Encodings[encoding]::WORD_REGEXP_SRC,
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    )
-  end
-  def count_latin_word()
-    split_to_word.collect{|word|
-      word if Regexp.new("[#{Encodings[encoding]::PRINT}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/i, 'none')).match word
-    }.compact.size
-  end
-  def count_ja_word()
-    split_to_word.collect{|word|
-      word if Regexp.new("[#{Encodings[encoding]::JA_PRINT}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/i, 'none')).match word
-    }.compact.size
-  end
-  def count_latin_valid_word()
-    split_to_word.collect{|word|
-      word if Regexp.new("[#{Encodings[encoding]::ALNUM}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/i, 'none')).match word
-    }.compact.size
-  end
-  def count_ja_valid_word()
-    split_to_word.collect{|word|
-      word if Regexp.new("[#{Encodings[encoding]::JA_GRAPH}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/i, 'none')).match word
-    }.compact.size
-  end
-  def split_to_line()
-#     scan(Regexp.new(".*?#{eol_char}|.+",
-#                     Regexp::MULTILINE,
-#                     encoding.sub(/ASCII/i, 'none'))
-#     )
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    if defined? eol_char
-      scan(Regexp.new(".*?#{eol_char}|.+",
-                      Regexp::MULTILINE,
-                      encoding.sub(/ASCII/i, 'none'))
-      )
-    else
-      scan(Regexp.new(".+",
-                      Regexp::MULTILINE,
-                      encoding.sub(/ASCII/i, 'none'))
-      )
-    end
-  end
-  def count_graph_line()
-    split_to_line.collect{|line|
-      line if Regexp.new("[#{Encodings[encoding]::GRAPH}" +
-                         "#{Encodings[encoding]::JA_GRAPH}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/, 'none')).match line
-    }.compact.size
-  end
-  def count_blank_line()
-    split_to_line.collect{|line|
-      line if Regexp.new("^[#{Encodings[encoding]::BLANK}" +
-                         "#{Encodings[encoding]::JA_BLANK}]+(?:#{eol_char})?",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/, 'none')).match line
-    }.compact.size
-  end
-  # load encoding modules
-  require 'docdiff/encoding/en_ascii'
-  require 'docdiff/encoding/ja_eucjp'
-  require 'docdiff/encoding/ja_sjis'
-  require 'docdiff/encoding/ja_utf8'
-end # end ruby_m17n?
   alias to_bytes split_to_byte
   alias to_chars split_to_char
   alias to_words split_to_word
@@ -573,6 +297,7 @@ end # end ruby_m17n?
   end
 end  # module CharString
+end  # class DocDiff
 # class String
 #   include CharString

data/lib/docdiff/diff/contours.rb CHANGED Viewed

@@ -46,6 +46,7 @@ Also in Nordic Journal of Computing (NJC), Vol. 2, No. 4, Winter 1995, 444 - 461
 http://web.informatik.uni-bonn.de/IV/Mitarbeiter/rick/lcs.dvi.Z
 =end
+class DocDiff
 class Diff
   class Contours
     def initialize(a, b)
@@ -379,4 +380,4 @@ class Diff
     end
   end
 end
+end  # class DocDiff

data/lib/docdiff/diff/editscript.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 require 'docdiff/diff/rcsdiff'
 require 'docdiff/diff/unidiff'
+class DocDiff
 class Diff
   class EditScript
     def initialize
@@ -146,3 +147,4 @@ class Diff
     end
   end
 end
+end  # class DocDiff

data/lib/docdiff/diff/rcsdiff.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+class DocDiff
 class Diff
   def Diff.rcsdiff(a, b)
     al = []
@@ -105,3 +106,4 @@ class Diff
     end
   end
 end
+end  # class DocDiff

data/lib/docdiff/diff/shortestpath.rb CHANGED Viewed

@@ -6,6 +6,7 @@ An O(NP) Sequence Comparison Algorithm,
 Information Processing Letters 35, 1990, 317-323
 =end
+class DocDiff
 class Diff
   class ShortestPath
     def initialize(a, b)
@@ -91,3 +92,4 @@ class Diff
     end
   end
 end
+end  # class DocDiff

data/lib/docdiff/diff/speculative.rb CHANGED Viewed

@@ -2,6 +2,7 @@ require 'docdiff/diff/shortestpath'
 require 'docdiff/diff/contours'
 require 'thread'
+class DocDiff
 class Diff
   class Speculative
     def initialize(a, b)
@@ -14,21 +15,22 @@ class Diff
       result = nil
       tg = ThreadGroup.new
+      m = Mutex.new
       # Since ShortestPath is faster than Contours if two sequences are very similar,
       # try it first.
       tg.add(Thread.new {
 	#print "ShortestPath start.\n"
 	result = ShortestPath.new(@a, @b).lcs
-	Thread.exclusive {tg.list.each {|t| t.kill if t != Thread.current}}
+	m.synchronize {tg.list.each {|t| t.kill if t != Thread.current}}
 	#print "ShortestPath win.\n"
       })
-      # start Contours unless ShortestPath is already ended with first quantum,
+      # start Contours unless ShortestPath is already ended with first quantum,
       tg.add(Thread.new {
 	#print "Contours start.\n"
 	result = Contours.new(@a, @b).lcs
-	Thread.exclusive {tg.list.each {|t| t.kill if t != Thread.current}}
+	m.synchronize {tg.list.each {|t| t.kill if t != Thread.current}}
 	#print "Contours win.\n"
       }) unless tg.list.empty?
@@ -38,3 +40,4 @@ class Diff
     end
   end
 end
+end  # class DocDiff

data/lib/docdiff/diff/subsequence.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+class DocDiff
 class Diff
   class Subsequence
     def initialize
@@ -37,3 +38,4 @@ class Diff
     end
   end
 end
+end  # class DocDiff

data/lib/docdiff/diff/unidiff.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+class DocDiff
 class Diff
   def Diff.unidiff(a, b, algorithm=nil)
     al = []
@@ -19,7 +20,6 @@ class Diff
     end
     def unidiff(out='', context_lines=3)
-      state = :common
       l1 = l2 = 1
       hunk = []
       hunk_l1 = hunk_l2 = 1
@@ -122,3 +122,4 @@ class Diff
     end
   end
 end
+end  # class DocDiff

data/lib/docdiff/diff.rb CHANGED Viewed

@@ -50,6 +50,7 @@ So, reduced input has following properties:
 * Any elemnt in B is also exist in A.
 =end
+class DocDiff
 class Diff
   def initialize(a, b)
     @original_a = a
@@ -215,3 +216,4 @@ class Diff
     end
   end
 end
+end  # class DocDiff

data/lib/docdiff/difference.rb CHANGED Viewed

@@ -4,6 +4,7 @@
 require 'docdiff/diff'
+class DocDiff
 class Difference < Array
 #  @resolution = nil # char, word, phrase, sentence, line, paragraph..
@@ -90,3 +91,4 @@ class Difference < Array
   end
 end  # class Difference
+end  # class DocDiff

data/lib/docdiff/document.rb CHANGED Viewed

@@ -8,6 +8,7 @@ end
 class EOLDetectionFailure < Exception
 end
+class DocDiff
 class Document
   def initialize(str, enc = nil, e = nil)
@@ -125,3 +126,4 @@ class Document
   end
 end  # class Document
+end  # class DocDiff

data/lib/docdiff/encoding/en_ascii.rb CHANGED Viewed

@@ -1,6 +1,9 @@
 # English ASCII encoding module for CharString
 # 2003- Hisashi MORITA
+# frozen_string_literal: false
+class DocDiff
 module CharString
   module ASCII
@@ -13,50 +16,21 @@ module CharString
     SPACE =     "\x09\x0a\x0b\x0c\x0d\x20"
     BLANK =     "\x09\x20"
     DIGIT =     "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
-    ALPHA =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
-                "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
-                "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
-                "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
-                "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
-                "\x79\x7a"
-    ALNUM =     "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
-                "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
+    UPPER =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
                 "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
-                "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
-                "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
-                "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
-                "\x79\x7a"
+                "\x55\x56\x57\x58\x59\x5a"
+    LOWER =     "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
+                "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
+                "\x75\x76\x77\x78\x79\x7a"
+    ALPHA =     UPPER + LOWER
+    ALNUM =     DIGIT + ALPHA
     PUNCT =     "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
                 "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
                 "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
                 "\x7d\x7e"
-    LOWER =     "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
-                "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
-                "\x75\x76\x77\x78\x79\x7a"
-    UPPER =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
-                "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
-                "\x55\x56\x57\x58\x59\x5a"
-    PRINT =     "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29" \
-                "\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33" \
-                "\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d" \
-                "\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47" \
-                "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51" \
-                "\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b" \
-                "\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65" \
-                "\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
-                "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79" \
-                "\x7a\x7b\x7c\x7d\x7e"
-    GRAPH =     "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
-                "\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34" \
-                "\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e" \
-                "\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48" \
-                "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52" \
-                "\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c" \
-                "\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66" \
-                "\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" \
-                "\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a" \
-                "\x7b\x7c\x7d\x7e"
-    XDIGIT =    "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
+    GRAPH =     DIGIT + UPPER + LOWER + PUNCT
+    PRINT =     "\x20" + GRAPH
+    XDIGIT =    DIGIT +
                 "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
                 "\x65\x66"
@@ -94,4 +68,5 @@ module CharString
     CharString.register_encoding(self)
   end  # module ASCII
-end
+end  # module CharString
+end  # class DocDiff

data/lib/docdiff/encoding/ja_eucjp.rb CHANGED Viewed

@@ -1,6 +1,9 @@
 # Japanese EUC-JP encoding module for CharString
 # 2003- Hisashi MORITA
+# frozen_string_literal: false
+class DocDiff
 module CharString
   module EUC_JP
@@ -16,50 +19,21 @@ module CharString
     SPACE =     "\x09\x0a\x0b\x0c\x0d\x20"
     BLANK =     "\x09\x20"
     DIGIT =     "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
-    ALPHA =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
-                "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
-                "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
-                "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
-                "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
-                "\x79\x7a"
-    ALNUM =     "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
-                "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
+    UPPER =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
                 "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
-                "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
-                "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
-                "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
-                "\x79\x7a"
+                "\x55\x56\x57\x58\x59\x5a"
+    LOWER =     "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
+                "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
+                "\x75\x76\x77\x78\x79\x7a"
+    ALPHA =     UPPER + LOWER
+    ALNUM =     DIGIT + ALPHA
     PUNCT =     "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
                 "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
                 "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
                 "\x7d\x7e"
-    LOWER =     "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
-                "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
-                "\x75\x76\x77\x78\x79\x7a"
-    UPPER =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
-                "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
-                "\x55\x56\x57\x58\x59\x5a"
-    PRINT =     "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29" \
-                "\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33" \
-                "\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d" \
-                "\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47" \
-                "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51" \
-                "\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b" \
-                "\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65" \
-                "\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
-                "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79" \
-                "\x7a\x7b\x7c\x7d\x7e"
-    GRAPH =     "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
-                "\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34" \
-                "\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e" \
-                "\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48" \
-                "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52" \
-                "\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c" \
-                "\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66" \
-                "\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" \
-                "\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a" \
-                "\x7b\x7c\x7d\x7e"
-    XDIGIT =    "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
+    GRAPH =     DIGIT + UPPER + LOWER + PUNCT
+    PRINT =     "\x20" + GRAPH
+    XDIGIT =    DIGIT +
                 "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
                 "\x65\x66"
     JA_SPACE =  "\xa1\xa1"
@@ -266,4 +240,5 @@ module CharString
     CharString.register_encoding(self)
   end  # module EUCJP
-end
+end  # module CharString
+end  # class DocDiff