RubyGems - docdiff - Versions diffs - 0.6.4 → 0.6.6 - Mend

docdiff 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +4 -4
data/Gemfile +7 -7
data/Guardfile +4 -4
data/Makefile +6 -7
data/README.md +1 -0
data/README_ja.md +1 -0
data/Rakefile +6 -6
data/bin/docdiff +2 -209
data/devutil/Rakefile +12 -5
data/devutil/char_by_charclass.rb +43 -20
data/devutil/charclass_by_char.rb +40 -19
data/devutil/jis0208.rb +263 -231
data/devutil/jis0208_test.rb +196 -0
data/doc/news.md +17 -0
data/docdiff.gemspec +13 -10
data/lib/doc_diff.rb +63 -98
data/lib/docdiff/charstring.rb +225 -241
data/lib/docdiff/cli.rb +316 -0
data/lib/docdiff/diff/contours.rb +1 -1
data/lib/docdiff/diff/editscript.rb +1 -1
data/lib/docdiff/diff/rcsdiff.rb +1 -1
data/lib/docdiff/diff/shortestpath.rb +1 -1
data/lib/docdiff/diff/speculative.rb +1 -1
data/lib/docdiff/diff/subsequence.rb +1 -1
data/lib/docdiff/diff/unidiff.rb +1 -1
data/lib/docdiff/diff.rb +1 -1
data/lib/docdiff/difference.rb +71 -70
data/lib/docdiff/document.rb +129 -109
data/lib/docdiff/encoding/en_ascii.rb +64 -58
data/lib/docdiff/encoding/ja_eucjp.rb +250 -235
data/lib/docdiff/encoding/ja_sjis.rb +240 -226
data/lib/docdiff/encoding/ja_utf8.rb +6952 -6939
data/lib/docdiff/version.rb +1 -1
data/lib/docdiff/view.rb +523 -427
data/lib/docdiff.rb +2 -2
data/test/charstring_test.rb +475 -351
data/test/cli_test.rb +314 -0
data/test/diff_test.rb +15 -16
data/test/difference_test.rb +40 -31
data/test/docdiff_test.rb +162 -159
data/test/document_test.rb +280 -175
data/test/fixture/format_wdiff.conf +1 -0
data/test/fixture/simple.conf +9 -0
data/test/test_helper.rb +2 -1
data/test/view_test.rb +636 -497
metadata +27 -9
data/devutil/testjis0208.rb +0 -38

data/lib/docdiff/document.rb CHANGED Viewed

@@ -1,129 +1,149 @@
 # Document class, a part of DocDiff
 # 2004-01-14.. Hisashi MORITA
-require 'docdiff/charstring'
+require "docdiff/charstring"
-class EncodingDetectionFailure < Exception
+class EncodingDetectionFailure < StandardError
 end
-class EOLDetectionFailure < Exception
+class EOLDetectionFailure < StandardError
 end
 class DocDiff
-class Document
-  def initialize(str, enc = nil, e = nil)
-    @body = str
-    @body.extend CharString
-    if enc
-      @body.encoding = enc
-    elsif !@body.encoding
-      guessed_encoding = CharString.guess_encoding(str)
-      if guessed_encoding == "UNKNOWN"
-        raise EncodingDetectionFailure, "encoding not specified, and auto detection failed."
+  class Document
+    def initialize(str, enc = nil, e = nil)
+      @body = str
+      @body.extend(CharString)
+      if enc
+        @body.encoding = enc
+      elsif !@body.encoding
+        guessed_encoding = CharString.guess_encoding(str)
+        if guessed_encoding == "UNKNOWN"
+          raise EncodingDetectionFailure, "encoding not specified, and auto detection failed."
         # @body.encoding = 'ASCII' # default to ASCII <= BAD!
-      else
-	@body.encoding = guessed_encoding
+        else
+          @body.encoding = guessed_encoding
+        end
       end
-    end
-    if e
-      @body.eol = e
-    else
-      guessed_eol = CharString.guess_eol(str)
-      if guessed_eol == "UNKNOWN"
-        raise EOLDetectionFailure, "eol not specified, and auto detection failed."
-        # @body.eol = 'LF' # default to LF
+      if e
+        @body.eol = e
       else
-        @body.eol = guessed_eol
+        guessed_eol = CharString.guess_eol(str)
+        if guessed_eol == "UNKNOWN"
+          raise EOLDetectionFailure, "eol not specified, and auto detection failed."
+        # @body.eol = 'LF' # default to LF
+        else
+          @body.eol = guessed_eol
+        end
       end
     end
-  end
-  def encoding()
-    @body.encoding
-  end
-  def encoding=(cs)
-    @body.encoding = cs
-  end
-  def eol()
-    @body.eol
-  end
-  def eol=(eolstr)
-    @body.eol = eolstr
-  end
-  def split_to_line()
-    @body.split_to_line
-  end
-  def split_to_word()
-    @body.split_to_word
-  end
-  def split_to_char()
-    @body.split_to_char
-  end
-  def split_to_byte()
-    @body.split_to_byte
-  end
+    def encoding
+      @body.encoding
+    end
-  def count_line()
-    @body.count_line
-  end
-  def count_blank_line()
-    @body.count_blank_line
-  end
-  def count_empty_line()
-    @body.count_empty_line
-  end
-  def count_graph_line()
-    @body.count_graph_line
-  end
+    def encoding=(cs)
+      @body.encoding = cs
+    end
-  def count_word()
-    @body.count_word
-  end
-  def count_latin_word()
-    @body.count_latin_word
-  end
-  def count_ja_word()
-    @body.count_ja_word
-  end
-  def count_valid_word()
-    @body.count_valid_word
-  end
-  def count_latin_valid_word()
-    @body.count_latin_valid_word
-  end
-  def count_ja_valid_word()
-    @body.count_ja_valid_word
-  end
+    def eol
+      @body.eol
+    end
-  def count_char()
-    @body.count_char
-  end
-  def count_blank_char()
-    @body.count_blank_char
-  end
-  def count_graph_char()
-    @body.count_graph_char
-  end
-  def count_latin_blank_char()
-    @body.count_latin_blank_char
-  end
-  def count_latin_graph_char()
-    @body.count_latin_graph_char
-  end
-  def count_ja_blank_char()
-    @body.count_ja_blank_char
-  end
-  def count_ja_graph_char()
-    @body.count_ja_graph_char
-  end
+    def eol=(eolstr)
+      @body.eol = eolstr
+    end
-  def count_byte()
-    @body.count_byte
-  end
+    def split_to_line
+      @body.split_to_line
+    end
-  def eol_char()
-    @body.eol_char
-  end
+    def split_to_word
+      @body.split_to_word
+    end
+    def split_to_char
+      @body.split_to_char
+    end
-end  # class Document
-end  # class DocDiff
+    def split_to_byte
+      @body.split_to_byte
+    end
+    def count_line
+      @body.count_line
+    end
+    def count_blank_line
+      @body.count_blank_line
+    end
+    def count_empty_line
+      @body.count_empty_line
+    end
+    def count_graph_line
+      @body.count_graph_line
+    end
+    def count_word
+      @body.count_word
+    end
+    def count_latin_word
+      @body.count_latin_word
+    end
+    def count_ja_word
+      @body.count_ja_word
+    end
+    def count_valid_word
+      @body.count_valid_word
+    end
+    def count_latin_valid_word
+      @body.count_latin_valid_word
+    end
+    def count_ja_valid_word
+      @body.count_ja_valid_word
+    end
+    def count_char
+      @body.count_char
+    end
+    def count_blank_char
+      @body.count_blank_char
+    end
+    def count_graph_char
+      @body.count_graph_char
+    end
+    def count_latin_blank_char
+      @body.count_latin_blank_char
+    end
+    def count_latin_graph_char
+      @body.count_latin_graph_char
+    end
+    def count_ja_blank_char
+      @body.count_ja_blank_char
+    end
+    def count_ja_graph_char
+      @body.count_ja_graph_char
+    end
+    def count_byte
+      @body.count_byte
+    end
+    def eol_char
+      @body.eol_char
+    end
+  end
+end

data/lib/docdiff/encoding/en_ascii.rb CHANGED Viewed

@@ -1,72 +1,78 @@
 # English ASCII encoding module for CharString
 # 2003- Hisashi MORITA
-# frozen_string_literal: false
+# frozen_string_literal: true
 class DocDiff
-module CharString
-  module ASCII
+  module CharString
+    module ASCII
+      ENCODING = "US-ASCII"
-    Encoding = "US-ASCII"
+      CNTRL =
+        "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" \
+          "\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13" \
+          "\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d" \
+          "\x1e\x1f\x7f"
+      SPACE =
+        "\x09\x0a\x0b\x0c\x0d\x20"
+      BLANK =
+        "\x09\x20"
+      DIGIT =
+        "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
+      UPPER =
+        "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
+          "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
+          "\x55\x56\x57\x58\x59\x5a"
+      LOWER =
+        "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
+          "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
+          "\x75\x76\x77\x78\x79\x7a"
+      ALPHA = UPPER + LOWER
+      ALNUM = DIGIT + ALPHA
+      PUNCT =
+        Regexp.quote(
+          "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
+            "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
+            "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
+            "\x7d\x7e",
+        )
+      GRAPH = DIGIT + UPPER + LOWER + PUNCT
+      PRINT = "\x20" + GRAPH
+      XDIGIT =
+        DIGIT +
+        "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
+          "\x65\x66"
-    CNTRL =     "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" \
-                "\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13" \
-                "\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d" \
-                "\x1e\x1f\x7f"
-    SPACE =     "\x09\x0a\x0b\x0c\x0d\x20"
-    BLANK =     "\x09\x20"
-    DIGIT =     "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
-    UPPER =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
-                "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
-                "\x55\x56\x57\x58\x59\x5a"
-    LOWER =     "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
-                "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
-                "\x75\x76\x77\x78\x79\x7a"
-    ALPHA =     UPPER + LOWER
-    ALNUM =     DIGIT + ALPHA
-    PUNCT =     "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
-                "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
-                "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
-                "\x7d\x7e"
-    GRAPH =     DIGIT + UPPER + LOWER + PUNCT
-    PRINT =     "\x20" + GRAPH
-    XDIGIT =    DIGIT +
-                "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
-                "\x65\x66"
+      JA_BLANK = "" # kludge...
+      JA_GRAPH = "" # kludge...
-    JA_BLANK =  "" # kludge...
-    JA_GRAPH =  "" # kludge...
+      WORD_REGEXP_SRC = [
+        "(?:[#{GRAPH}]+[#{BLANK}]?)",
+        "|(?:[#{SPACE}]+)",
+        "|(?:.+?)",
+      ].join
-    PUNCT.replace(Regexp.quote(PUNCT)) # kludge to avoid warning "character class has `[' without escape"
-    PRINT.replace(Regexp.quote(PRINT)) # kludge to avoid warning "character class has `[' without escape"
-    GRAPH.replace(Regexp.quote(GRAPH)) # kludge to avoid warning "character class has `[' without escape"
+      # override default method, as ASCII has no Japanese in it
+      def count_ja_graph_char
+        0
+      end
-    WORD_REGEXP_SRC = ["(?:[#{GRAPH}]+[#{BLANK}]?)",
-                       "|(?:[#{SPACE}]+)",
-                       "|(?:.+?)"].join
+      # override default method, as ASCII has no Japanese in it
+      def count_ja_blank_char
+        0
+      end
-    # override default method, as ASCII has no Japanese in it
-    def count_ja_graph_char()
-      0
-    end
-    # override default method, as ASCII has no Japanese in it
-    def count_ja_blank_char()
-      0
-    end
+      # override default method, as ASCII has no Japanese in it
+      def count_ja_word
+        0
+      end
-    # override default method, as ASCII has no Japanese in it
-    def count_ja_word()
-      0
-    end
+      # override default method, as ASCII has no Japanese in it
+      def count_ja_valid_word
+        0
+      end
-    # override default method, as ASCII has no Japanese in it
-    def count_ja_valid_word()
-      0
+      CharString.register_encoding(self)
     end
-    CharString.register_encoding(self)
-  end  # module ASCII
-end  # module CharString
-end  # class DocDiff
+  end
+end