RubyGems - docdiff - Versions diffs - 0.5.0 → 0.6.1 - Mend

docdiff 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +7 -0
data/.travis.yml +5 -3
data/Gemfile +1 -1
data/Makefile +15 -19
data/Rakefile +45 -10
data/bin/docdiff +9 -11
data/devutil/Rakefile +9 -0
data/devutil/changelog.sh +40 -0
data/docdiff.gemspec +4 -4
data/docdiffwebui.cgi +1 -1
data/langfilter.rb +1 -5
data/lib/doc_diff.rb +5 -1
data/lib/docdiff.rb +1 -1
data/lib/docdiff/charstring.rb +6 -282
data/lib/docdiff/diff.rb +2 -0
data/lib/docdiff/diff/contours.rb +2 -1
data/lib/docdiff/diff/editscript.rb +2 -0
data/lib/docdiff/diff/rcsdiff.rb +2 -0
data/lib/docdiff/diff/shortestpath.rb +2 -0
data/lib/docdiff/diff/speculative.rb +6 -3
data/lib/docdiff/diff/subsequence.rb +2 -0
data/lib/docdiff/diff/unidiff.rb +2 -0
data/lib/docdiff/difference.rb +2 -0
data/lib/docdiff/document.rb +2 -0
data/lib/docdiff/encoding/en_ascii.rb +3 -1
data/lib/docdiff/encoding/ja_eucjp.rb +3 -1
data/lib/docdiff/encoding/ja_sjis.rb +3 -1
data/lib/docdiff/encoding/ja_utf8.rb +3 -1
data/lib/docdiff/version.rb +1 -1
data/lib/docdiff/view.rb +4 -10
data/lib/viewdiff.rb +9 -5
data/readme.html +23 -3
data/readme.md +184 -0
data/test/charstring_test.rb +13 -26
data/test/diff_test.rb +2 -1
data/test/difference_test.rb +2 -1
data/test/docdiff_test.rb +9 -2
data/test/document_test.rb +4 -6
data/test/view_test.rb +3 -1
data/test/viewdiff_test.rb +14 -11
metadata +23 -29
data/devutil/JIS0208.TXT +0 -6952

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: f8733587f13d662ca5c5fda60843298ef71ef798284d9f677d03378d8b5e0e29
+  data.tar.gz: f92804fdb17576aaded799010553a0d6685dbb8679e53cbf6cf15c4773af8ddf
+SHA512:
+  metadata.gz: 94af3213ec734c2b80ad72f70bb1d1312d14da7d15e77237f1824ade5d83f39d666bf8049dcb4c6a2898b402ab80b64c22184bb93359f23a8496f83d0e191f0b
+  data.tar.gz: dfd7a6f65ff88a556b5ccb5612daac005b3405cb0f36f64cc5f4914a061bdbd20c4ac4ab5610c657b9036f09485a18e912e527bc5c78b276d2a494bfc977455a

data/.travis.yml CHANGED Viewed

@@ -1,7 +1,9 @@
 rvm:
-  - 1.8.7
-  - 1.9.2
-  - 1.9.3
+  - 2.0
+  - 2.1
+  - 2.2
+  - 2.3
+  - 2.4
   - ruby-head
 script: rake test

data/Gemfile CHANGED Viewed

@@ -1,4 +1,4 @@
-source :rubygems
+source 'https://rubygems.org'
 group :darwin do
   gem 'rb-fsevent'

data/Makefile CHANGED Viewed

@@ -1,19 +1,21 @@
+# Warning: this Makefile is obsolete, use Rakefile instead
 PRODUCT = docdiff
-VERSION = 0.4.0
+VERSION = $(shell $(RUBY) -r./lib/docdiff/version.rb -e 'Docdiff::VERSION.display')
 RUBY = ruby
 TAR_XVCS = tar --exclude=.svn --exclude=.git
 DOCS   = ChangeLog readme.en.html readme.ja.html \
          index.en.html index.ja.html
 DOCSRC = readme.html index.html img sample
-TESTS  = testcharstring.rb testdiff.rb testdifference.rb \
-         testdocdiff.rb testdocument.rb testview.rb
-DIST   = Makefile devutil docdiff docdiff.conf.example docdiff.rb \
+TESTS  = test/*_test.rb
+DIST   = Makefile devutil lib docdiff.conf.example bin/docdiff \
          docdiff.gemspec \
          docdiffwebui.html docdiffwebui.cgi \
          $(DOCSRC) $(DOCS) $(TESTS)
-TESTLOGS = testdocdiff.log testcharstring.log testdocument.log \
-         testdiff.log testdifference.log testview.log testviewdiff.log
+TESTLOGS = $(foreach t,\
+                     $(wildcard test/*_test.rb),\
+                     $(t:test/%_test.rb=%_test.log)) \
 WWWUSER = hisashim,docdiff
 WWWSITE = web.sourceforge.net
@@ -28,22 +30,16 @@ all:	$(DOCS)
 testall:
 	$(MAKE) test RUBY=ruby1.9.1
-	$(MAKE) test RUBY=ruby1.8
 test: $(TESTLOGS)
-test%.log:
-	$(RUBY) -I. test/test$*.rb | tee $@
+%_test.log:
+	$(RUBY) -I./lib test/$*_test.rb | tee $@
 docs:	$(DOCS)
 ChangeLog:
-# For real ChangeLog style, try http://arthurdejong.org/svn2cl/
-	if [ -d .svn ] ; then \
-	  svn log -rHEAD:0 -v > ChangeLog ; \
-	else \
-	  git svn log > ChangeLog ; \
-	fi
+	devutil/changelog.sh > $@
 readme.%.html: readme.html
 	$(RUBY) -Ku langfilter.rb --$* $< > $@
@@ -54,13 +50,13 @@ install: $(DIST)
 	@if [ ! -d $(DESTDIR)$(PREFIX)/bin ]; then \
 	  mkdir -p $(DESTDIR)$(PREFIX)/bin; \
 	fi
-	cp -Ppv docdiff.rb $(DESTDIR)$(PREFIX)/bin/docdiff
+	cp -Ppv bin/docdiff $(DESTDIR)$(PREFIX)/bin/
 	chmod +x $(DESTDIR)$(PREFIX)/bin/docdiff
-	@if [ ! -d $(datadir)$(PRODUCT) ]; then \
-	  mkdir -p $(datadir)$(PRODUCT); \
+	@if [ ! -d $(datadir)/$(PRODUCT) ]; then \
+	  mkdir -p $(datadir)/$(PRODUCT); \
 	fi
-	($(TAR_XVCS) -cf - docdiff) | (cd $(datadir)$(PRODUCT) && tar -xpf -)
+	(cd lib && $(TAR_XVCS) -cf - *) | (cd $(datadir)/$(PRODUCT) && tar -xpf -)
 	@if [ ! -d $(DESTDIR)/etc/$(PRODUCT) ]; then \
 	  mkdir -p $(DESTDIR)/etc/$(PRODUCT); \

data/Rakefile CHANGED Viewed

@@ -1,17 +1,52 @@
+require 'rake/clean'
+require 'rake/testtask'
 require 'bundler/gem_tasks'
-require 'rake/testtask'
+RUBY    = ENV['RUBY'] ||= 'ruby'
+DOCS   = FileList['ChangeLog', 'readme.en.html', 'readme.ja.html',
+                  'index.en.html', 'index.ja.html']
+DOCSRC = FileList['readme.html', 'index.html', 'img', 'sample']
+TESTS  = FileList['test/*_test.rb']
+TESTLOGS = Dir.glob('test/*_test.rb').map{|f|
+  File.basename(f).ext('log')
+}
+WWWUSER     = ENV['WWWUSER']     ||= 'hisashim,docdiff'
+WWWSITE     = ENV['WWWSITE']     ||= 'web.sourceforge.net'
+WWWSITEPATH = ENV['WWWSITEPATH'] ||= 'htdocs/'
+WWWDRYRUN   = ENV['WWWDRYRUN']   ||= '--dry-run'
 Rake::TestTask.new do |t|
-  t.test_files = FileList["test/test*.rb"]
+  t.test_files = TESTS
   t.verbose = true
 end
-if RUBY_VERSION < '1.9'
-  require 'rcov/rcovtask'
-  Rcov::RcovTask.new do |t|
-    t.test_files = FileList['test/test*.rb']
-    t.output_dir = 'coverage'
-    t.rcov_opts = ["--exclude /gems/*"]
-    t.verbose = true
-  end
+task :default => :test
+desc "generate documents"
+task :docs => DOCS
+file 'ChangeLog' do |t|
+  sh "devutil/changelog.sh > #{t.name}"
 end
+rule(/.*\.(?:en|ja)\.html/ => proc{|tn| tn.gsub(/\.(?:en|ja)/, '')}) do |t|
+  sh "#{RUBY} -E UTF-8 langfilter.rb" +
+    " --#{t.name.gsub(/.*?\.(en|ja)\.html/){$1}}" +
+    " #{t.prerequisites.first} > #{t.name}"
+end
+desc "force to rsync web contents"
+task :wwwupload do |t|
+  sh "rake www WWWDRYRUN="
+end
+desc "rsync web contents"
+task :www => DOCSRC + DOCS do |t|
+  sh "rsync #{WWWDRYRUN} -auv -e ssh --delete" +
+    " --exclude='.svn' --exclude='.git'" +
+    t.prerequisites.join(' ') +
+    " #{WWWUSER}@#{WWWSITE}:#{WWWSITEPATH}"
+end
+CLEAN.include(DOCS, TESTLOGS)

data/bin/docdiff CHANGED Viewed

@@ -1,11 +1,8 @@
 #!/usr/bin/env ruby
 # DocDiff: word/character-oriented text comparison utility
 # Copyright (C) 2002-2011 Hisashi MORITA
-# Requirements: Ruby (>= 1.8)
+# Requirements: Ruby (>= 2.0)
 require 'docdiff'
-require 'docdiff/difference'
-require 'docdiff/document'
-require 'docdiff/view'
 require 'optparse'
 # do_config_stuff
@@ -40,7 +37,7 @@ ARGV.options {|o|
   o.def_option('--char', 'set resolution to char'){clo[:resolution] = "char"}
   o.def_option('--encoding=ENCODING',
-    possible_encodings = ['ASCII','EUC-JP','Shift_JIS','UTF-8','auto'],
+    possible_encodings = ['ASCII','EUC-JP','Shift_JIS','CP932','UTF-8','auto'],
     'specify character encoding',
     possible_encodings.join('|'), "(default is auto. try ASCII for single byte encodings such as ISO-8859-X)"
     ){|s| clo[:encoding] = (s || "auto")}
@@ -48,6 +45,7 @@ ARGV.options {|o|
   o.def_option('--iso8859x', 'same as --encoding=ASCII'){clo[:encoding] = "ASCII"}
   o.def_option('--eucjp', 'same as --encoding=EUC-JP'){clo[:encoding] = "EUC-JP"}
   o.def_option('--sjis', 'same as --encoding=Shift_JIS'){clo[:encoding] = "Shift_JIS"}
+  o.def_option('--cp932', 'same as --encoding=CP932'){clo[:encoding] = "CP932"}
   o.def_option('--utf8', 'same as --encoding=UTF-8'){clo[:encoding] = "UTF-8"}
   o.def_option('--eol=EOL',
@@ -145,8 +143,8 @@ eol1 = docdiff.config[:eol]
 eol2 = docdiff.config[:eol]
 if docdiff.config[:encoding] == "auto"
-  encoding1 = CharString.guess_encoding(file1_content)
-  encoding2 = CharString.guess_encoding(file2_content)
+  encoding1 = DocDiff::CharString.guess_encoding(file1_content)
+  encoding2 = DocDiff::CharString.guess_encoding(file2_content)
   case
   when (encoding1 == "UNKNOWN" or encoding2 == "UNKNOWN")
     raise "Document encoding unknown (#{encoding1}, #{encoding2})."
@@ -156,8 +154,8 @@ if docdiff.config[:encoding] == "auto"
 end
 if docdiff.config[:eol] == "auto"
-  eol1 = CharString.guess_eol(file1_content)
-  eol2 = CharString.guess_eol(file2_content)
+  eol1 = DocDiff::CharString.guess_eol(file1_content)
+  eol2 = DocDiff::CharString.guess_eol(file2_content)
   case
   when (eol1.nil? or eol2.nil?)
     raise "Document eol is nil (#{eol1.inspect}, #{eol2.inspect}).  The document might be empty."
@@ -168,8 +166,8 @@ if docdiff.config[:eol] == "auto"
   end
 end
-doc1 = Document.new(file1_content, encoding1, eol1)
-doc2 = Document.new(file2_content, encoding2, eol2)
+doc1 = DocDiff::Document.new(file1_content, encoding1, eol1)
+doc2 = DocDiff::Document.new(file2_content, encoding2, eol2)
 output = docdiff.run(doc1, doc2,
   {:resolution => docdiff.config[:resolution],

data/devutil/Rakefile ADDED Viewed

@@ -0,0 +1,9 @@
+require 'rake/clean'
+file 'JIS0208.TXT' do |t|
+  sh 'curl -O ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT'
+end
+task :default => 'JIS0208.TXT'
+CLOBBER.include('JIS0208.TXT')

data/devutil/changelog.sh ADDED Viewed

@@ -0,0 +1,40 @@
+#!/bin/sh
+# ChangeLog Generator
+# Copyright 2011 Hisashi Morita
+# License: Public Domain
+#
+# Usage:
+#   changelog.sh [WORKING_DIR] > ChangeLog
+if [ "$1" ]; then
+  WD="$1"
+else
+  WD="."
+fi
+# Subversion
+which svn >/dev/null
+if [ x"$?" = x0 ]; then
+  (svn info "${WD}" >/dev/null 2>&1) && SVN=TRUE
+  if [ x"${SVN}" = xTRUE ]; then
+    (cd "${WD}"; svn log -rBASE:0 -v)
+  fi
+fi
+# Git
+which git >/dev/null
+if [ x"$?" = x0 ]; then
+  (cd "${WD}" && git status --porcelain >/dev/null 2>&1) && GIT=TRUE
+  if [ x"${GIT}" = xTRUE ]; then
+    (cd "${WD}"; git log | cat)
+  fi
+fi
+# Mercurial
+which hg >/dev/null
+if [ x"$?" = x0 ]; then
+  (hg status "${WD}" >/dev/null 2>&1) && HG=TRUE
+  if [ x"${HG}" = xTRUE ]; then
+    (cd "${WD}"; hg log --rev tip:0)
+  fi
+fi

data/docdiff.gemspec CHANGED Viewed

@@ -1,20 +1,20 @@
 # -*- encoding: utf-8 -*-
-$:.push File.expand_path("../lib", __FILE__)
+$:.unshift File.expand_path("../lib", __FILE__)
 require "docdiff/version"
 Gem::Specification.new do |s|
   s.name        = "docdiff"
   s.version     = Docdiff::VERSION
+  s.license     = "BSD-3-Clause"
   s.authors     = ["Hisashi Morita"]
-  s.email       = ["hisashim at users.sourceforge.net"]
-  s.homepage    = "http://docdiff.sourceforge.net"
+  s.email       = ["hisashim at workbook.org"]
+  s.homepage    = "https://github.com/hisashim/docdiff"
   s.summary     = %q{Word-by-word diff}
   s.description = %q{DocDiff compares two text files and shows the
                      difference. It can compare files word by word,
                      character by character, or line by line. It has
                      several output formats such as HTML, tty, Manued,
                      or user-defined markup.}
-  s.rubyforge_project = "docdiff"
   s.files         = `git ls-files`.split("\n")
   s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")

data/docdiffwebui.cgi CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/ruby
 # DocDiff Web UI (CGI)
 # 2005-10-08.. Hisashi Morita
-# requirement: Ruby 1.8+ (for timeout.rb)
+# requirement: Ruby 2.0+ (for timeout.rb)
 require 'cgi'
 require 'tempfile'

data/langfilter.rb CHANGED Viewed

@@ -2,13 +2,9 @@
 # language filter
 # usage: langfilter.rb --en <infile >outfile
-def ruby_m17n?
-  return true if "".respond_to? :encoding
-end
 lang_to_include = ARGV.shift.gsub(/-+/, "")
 lang_to_exclude = {"en"=>"ja", "ja"=>"en"}[lang_to_include]
 re = /<([a-z]+) +(?:(?:lang|title)="#{lang_to_exclude}").*?>.*?<\/\1>[\r\n]?/m
-ARGF.set_encoding("UTF-8") if ruby_m17n?
+ARGF.set_encoding("UTF-8")
 ARGF.read.gsub(re, "").display

data/lib/doc_diff.rb CHANGED Viewed

@@ -1,6 +1,10 @@
 # DocDiff: word/character-oriented text comparison utility
 # Copyright (C) 2002-2011 Hisashi MORITA
-# Requirements: Ruby (>= 1.8)
+# Requirements: Ruby (>= 2.0)
+require 'docdiff/difference'
+require 'docdiff/document'
+require 'docdiff/view'
 class DocDiff
   AppVersion = Docdiff::VERSION

data/lib/docdiff.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # DocDiff: word/character-oriented text comparison utility
 # Copyright (C) 2002-2011 Hisashi MORITA
-# Requirements: Ruby (>= 1.8)
+# Requirements: Ruby (>= 2.0)
 require 'docdiff/version'
 require 'doc_diff'
 module Docdiff

data/lib/docdiff/charstring.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 # To use, include to String, or extend String.
 # 2003- Hisashi MORITA
+class DocDiff
 module CharString
   Encodings = {}
@@ -72,9 +73,10 @@ module CharString
     # returns 'CR', 'LF', 'CRLF', 'UNKNOWN'(binary),
     # 'NONE'(1-line), or nil
     return nil if string == nil  #=> nil (argument missing)
-    eol_counts = {'CR'   => string.scan(/(\r)(?!\n)/o).size,
-                  'LF'   => string.scan(/(?:\A|[^\r])(\n)/o).size,
-                  'CRLF' => string.scan(/(\r\n)/o).size}
+    bin_string = string.dup.force_encoding("ASCII-8BIT")
+    eol_counts = {'CR'   => bin_string.scan(/(\r)(?!\n)/o).size,
+                  'LF'   => bin_string.scan(/(?:\A|[^\r])(\n)/o).size,
+                  'CRLF' => bin_string.scan(/(\r\n)/o).size}
     eol_counts.delete_if{|eol, count| count == 0}  # Remove missing EOL
     eols = eol_counts.keys
     eol_variety = eols.size  # numbers of flavors found
@@ -87,10 +89,6 @@ module CharString
     end
   end
-  def CharString.ruby_m17n?
-    "".respond_to?(:force_encoding)
-  end
   # Note that some languages (like Japanese) do not have 'word' or 'phrase',
   # thus some of the following methods are not 'linguistically correct'.
@@ -128,7 +126,6 @@ module CharString
     }.compact.size
   end
-if ruby_m17n?
   # for Ruby-1.9
   def encoding()
     String.new(self).encoding.to_s
@@ -254,280 +251,6 @@ if ruby_m17n?
   require 'docdiff/encoding/ja_eucjp'
   require 'docdiff/encoding/ja_sjis'
   require 'docdiff/encoding/ja_utf8'
-else
-  # for Ruby-1.8
-  require 'iconv'
-  def encoding()
-    @encoding
-#     if @encoding
-#       @encoding
-#     else
-#       @encoding = CharString.guess_encoding(self)
-#       # raise "encoding is not set.\n"
-#     end
-  end
-  def encoding=(cs)
-    @encoding = cs
-    extend Encodings[@encoding]  # ; p "Hey, I extended #{Encodings[@encoding]}!"
-  end
-  # returns nil, 'US-ASCII', 'JIS', 'EUC-JP', 'Shift_JIS', 'UTF-8', or 'UNKNOWN'
-  def CharString.guess_encoding(string)
-    return nil if string == nil
-    result_using_pureruby = CharString.guess_encoding_using_pureruby(string)
-    result_using_iconv    = CharString.guess_encoding_using_iconv(string)
-    if result_using_pureruby == result_using_iconv
-      result_using_pureruby
-    else
-      "UNKNOWN"
-    end
-  end
-  # returns nil, 'US-ASCII', 'JIS', 'EUC-JP', 'Shift_JIS', 'UTF-8', or 'UNKNOWN'
-  def CharString.guess_encoding_using_pureruby(string)
-    return nil if string == nil
-    ascii_pat = '[\x00-\x7f]'
-    jis_pat   = ['(?:(?:\x1b\x28\x42)',
-                 '|(?:\x1b\x28\x4a)',
-                 '|(?:\x1b\x28\x49)',
-                 '|(?:\x1b\x24\x40)',
-                 '|(?:\x1b\x24\x42)',
-                 '|(?:\x1b\x24\x44))'].join
-    eucjp_pat = ['(?:(?:[\x00-\x1f\x7f])',
-                 '|(?:[\x20-\x7e])',
-                 '|(?:\x8e[\xa1-\xdf])',
-                 '|(?:[\xa1-\xfe][\xa1-\xfe])',
-                 '|(?:\x8f[\xa1-\xfe][\xa1-\xfe]))'].join
-    sjis_pat  = ['(?:(?:[\x00-\x1f\x7f])',
-                 '|(?:[\x20-\x7e])',
-                 '|(?:[\xa1-\xdf])',
-                 '|(?:[\x81-\x9f][\x40-\x7e])',
-                 '|(?:[\xe0-\xef][\x80-\xfc]))'].join
-    utf8_pat  = ['(?:(?:[\x00-\x7f])',
-                 '|(?:[\xc0-\xdf][\x80-\xbf])',
-                 '|(?:[\xe0-\xef][\x80-\xbf][\x80-\xbf])',
-                 '|(?:[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]))'].join
-    ascii_match_length = string.scan(/#{ascii_pat}/on).join.length
-    jis_escseq_count   = string.scan(/#{jis_pat}/on).size
-    eucjp_match_length = string.scan(/#{eucjp_pat}/no).join.length
-    sjis_match_length  = string.scan(/#{sjis_pat}/no).join.length
-    utf8_match_length  = string.scan(/#{utf8_pat}/no).join.length
-    case
-    when 0 < jis_escseq_count                 # JIS escape sequense found
-      guessed_encoding = 'JIS'
-    when ascii_match_length == string.length  # every char is ASCII (but not JIS)
-      guessed_encoding = 'US-ASCII'
-    else
-      case
-      when eucjp_match_length < (string.length / 2) &&
-           sjis_match_length  < (string.length / 2) &&
-           utf8_match_length  < (string.length / 2)
-        guessed_encoding = 'UNKNOWN'  # either encoding did not match long enough
-      when (eucjp_match_length < utf8_match_length) &&
-           (sjis_match_length < utf8_match_length)
-        guessed_encoding = 'UTF-8'
-      when (eucjp_match_length < sjis_match_length) &&
-           (utf8_match_length < sjis_match_length)
-        guessed_encoding = 'Shift_JIS'
-      when (sjis_match_length < eucjp_match_length) &&
-           (utf8_match_length < eucjp_match_length)
-        guessed_encoding = 'EUC-JP'
-      else
-        guessed_encoding = 'UNKNOWN'  # cannot guess at all
-      end
-    end
-    return guessed_encoding
-  end
-  def CharString.guess_encoding_using_iconv(string)
-    valid_as_utf8   = CharString.valid_as("utf-8", string)
-    valid_as_sjis   = CharString.valid_as("cp932", string) # not sjis, but cp932
-    valid_as_jis    = CharString.valid_as("iso-2022-jp", string)
-    valid_as_eucjp  = CharString.valid_as("eucjp", string)
-    valid_as_ascii  = CharString.valid_as("ascii", string)
-    invalid_as_utf8   = CharString.invalid_as("utf-8", string)
-    invalid_as_sjis   = CharString.invalid_as("cp932", string) # not sjis, but cp932
-    invalid_as_jis    = CharString.invalid_as("iso-2022-jp", string)
-    invalid_as_eucjp  = CharString.invalid_as("eucjp", string)
-    invalid_as_ascii  = CharString.invalid_as("ascii", string)
-    case
-    when string == nil
-      nil
-    when valid_as_ascii
-      "US-ASCII"
-    when valid_as_jis  # Iconv sometimes recognizes JIS for ASCII, ignoring JIS escape sequence.
-      "JIS"
-    when valid_as_eucjp
-      "EUC-JP"
-    when valid_as_sjis && invalid_as_utf8 && invalid_as_eucjp && invalid_as_jis
-      "Shift_JIS"
-    when valid_as_utf8 && invalid_as_sjis && invalid_as_eucjp && invalid_as_jis
-      "UTF-8"
-    else
-      "UNKNOWN"
-    end
-  end
-  def CharString.valid_as(encoding_name, string)
-    begin
-      Iconv.iconv(encoding_name, encoding_name, string)
-    rescue Iconv::IllegalSequence, Iconv::InvalidCharacter, Iconv::OutOfRange
-      return false
-    else
-      return true
-    end
-  end
-  def CharString.invalid_as(encoding_name, string)
-    if CharString.valid_as(encoding_name, string)
-      false
-    else
-      true
-    end
-  end
-  def split_to_byte()
-    scan(/./nm)
-  end
-  def split_to_char()
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    if eol_char  # sometimes string has no end-of-line char
-      scan(Regexp.new("(?:#{eol_char})|(?:.)",
-                      Regexp::MULTILINE,
-                      encoding.sub(/ASCII/i, 'none'))
-      )
-    else                  # it seems that no EOL module was extended...
-      scan(Regexp.new("(?:.)",
-                      Regexp::MULTILINE,
-                      encoding.sub(/ASCII/i, 'none'))
-      )
-    end
-  end
-  def count_latin_graph_char()
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    scan(Regexp.new("[#{Encodings[encoding]::GRAPH}]",
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    ).size
-  end
-  def count_ja_graph_char()
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    scan(Regexp.new("[#{Encodings[encoding]::JA_GRAPH}]",
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    ).size
-  end
-  def count_latin_blank_char()
-    scan(Regexp.new("[#{Encodings[encoding]::BLANK}]",
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    ).size
-  end
-  def count_ja_blank_char()
-    scan(Regexp.new("[#{Encodings[encoding]::JA_BLANK}]",
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    ).size
-  end
-  def split_to_word()
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    # raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    scan(Regexp.new(Encodings[encoding]::WORD_REGEXP_SRC,
-                    Regexp::MULTILINE,
-                    encoding.sub(/ASCII/i, 'none'))
-    )
-  end
-  def count_latin_word()
-    split_to_word.collect{|word|
-      word if Regexp.new("[#{Encodings[encoding]::PRINT}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/i, 'none')).match word
-    }.compact.size
-  end
-  def count_ja_word()
-    split_to_word.collect{|word|
-      word if Regexp.new("[#{Encodings[encoding]::JA_PRINT}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/i, 'none')).match word
-    }.compact.size
-  end
-  def count_latin_valid_word()
-    split_to_word.collect{|word|
-      word if Regexp.new("[#{Encodings[encoding]::ALNUM}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/i, 'none')).match word
-    }.compact.size
-  end
-  def count_ja_valid_word()
-    split_to_word.collect{|word|
-      word if Regexp.new("[#{Encodings[encoding]::JA_GRAPH}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/i, 'none')).match word
-    }.compact.size
-  end
-  def split_to_line()
-#     scan(Regexp.new(".*?#{eol_char}|.+",
-#                     Regexp::MULTILINE,
-#                     encoding.sub(/ASCII/i, 'none'))
-#     )
-    raise "Encodings[encoding] is #{Encodings[encoding].inspect}: encoding not specified or auto-detection failed." unless Encodings[encoding]
-    raise "EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed." unless EOLChars[eol]
-    if defined? eol_char
-      scan(Regexp.new(".*?#{eol_char}|.+",
-                      Regexp::MULTILINE,
-                      encoding.sub(/ASCII/i, 'none'))
-      )
-    else
-      scan(Regexp.new(".+",
-                      Regexp::MULTILINE,
-                      encoding.sub(/ASCII/i, 'none'))
-      )
-    end
-  end
-  def count_graph_line()
-    split_to_line.collect{|line|
-      line if Regexp.new("[#{Encodings[encoding]::GRAPH}" +
-                         "#{Encodings[encoding]::JA_GRAPH}]",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/, 'none')).match line
-    }.compact.size
-  end
-  def count_blank_line()
-    split_to_line.collect{|line|
-      line if Regexp.new("^[#{Encodings[encoding]::BLANK}" +
-                         "#{Encodings[encoding]::JA_BLANK}]+(?:#{eol_char})?",
-                         Regexp::MULTILINE,
-                         encoding.sub(/ASCII/, 'none')).match line
-    }.compact.size
-  end
-  # load encoding modules
-  require 'docdiff/encoding/en_ascii'
-  require 'docdiff/encoding/ja_eucjp'
-  require 'docdiff/encoding/ja_sjis'
-  require 'docdiff/encoding/ja_utf8'
-end # end ruby_m17n?
   alias to_bytes split_to_byte
   alias to_chars split_to_char
   alias to_words split_to_word
@@ -573,6 +296,7 @@ end # end ruby_m17n?
   end
 end  # module CharString
+end  # class DocDiff
 # class String
 #   include CharString