RubyGems - tefil - Versions diffs - 0.1.4 → 0.1.5 - Mend

tefil 0.1.4 → 0.1.5

Files changed (30) hide show

checksums.yaml +4 -4
data/CHANGES +10 -1
data/VERSION +1 -1
data/bin/columnform +1 -0
data/bin/linesplit +48 -0
data/example/linesplit/run.sh +6 -0
data/example/linesplit/sample1.txt +1 -0
data/example/linesplit/sample2.txt +2 -0
data/example/linesplit/sample3.txt +2 -0
data/example/linesplit/sample4.txt +5 -0
data/lib/tefil.rb +1 -2
data/lib/tefil/columnformer.rb +5 -0
data/lib/tefil/fswikitomd.rb +11 -1
data/lib/tefil/linesplitter.rb +50 -0
data/lib/tefil/mdtofswiki.rb +5 -5
data/lib/tefil/textfilterbase.rb +9 -0
data/tefil.gemspec +12 -11
data/test/test_columnformer.rb +20 -3
data/test/test_fswikitomd.rb +130 -80
data/test/test_linesplitter.rb +129 -0
data/test/test_mdtofswiki.rb +55 -30
data/test/test_textfilterbase.rb +11 -3
metadata +11 -11
data/bin/eachsentence +0 -24
data/bin/statistics +0 -24
data/example/eachsentence/sample.txt +0 -14
data/lib/tefil/eachsentence.rb +0 -35
data/lib/tefil/statistics.rb +0 -38
data/test/test_eachsentence.rb +0 -123
data/test/test_statistics.rb +0 -38

@@ -0,0 +1,129 @@
+#! /usr/bin/env ruby
+# coding: utf-8
+require "helper"
+require "stringio"
+# 元々の行末は保存する。
+# 消すようにすると、空行などの処理が面倒になる。
+class TC_LineSplitter < Test::Unit::TestCase
+  def setup
+    @test00 = Tefil::LineSplitter.new(separators: %w(.))
+  end
+  def test_process_stream
+    # divide
+    in_io = StringIO.new
+    in_io.puts "Abc def. Ghi jhk."
+    in_io.rewind
+    out_io = StringIO.new
+    @test00.process_stream(in_io, out_io)
+    out_io.rewind
+    result = out_io.read
+    correct = "Abc def.\n Ghi jhk.\n"
+    assert_equal(correct, result)
+    # 行末の保存
+    in_io = StringIO.new
+    in_io.puts "Abc def\nGhi jhk."
+    in_io.rewind
+    out_io = StringIO.new
+    @test00.process_stream(in_io, out_io)
+    out_io.rewind
+    result = out_io.read
+    correct = "Abc def\nGhi jhk.\n"
+    assert_equal(correct, result)
+    # indent
+    in_io = StringIO.new
+    in_io.puts "  Abc def. Ghi jhk.\n"
+    in_io.rewind
+    out_io = StringIO.new
+    @test00.process_stream(in_io, out_io)
+    out_io.rewind
+    result = out_io.read
+    correct = "  Abc def.\n Ghi jhk.\n"
+    assert_equal(correct, result)
+    # empty line
+    in_io = StringIO.new
+    in_io.puts "Abc def.\n\nGhi jhk.\n"
+    in_io.rewind
+    out_io = StringIO.new
+    @test00.process_stream(in_io, out_io)
+    out_io.rewind
+    result = out_io.read
+    correct = "Abc def.\n\n\nGhi jhk.\n"
+    assert_equal(correct, result)
+    # Fig.
+    in_io = StringIO.new
+    in_io.puts "Including Fig. 3. Fig. 3? Fig.\n4 does' not exist."
+    in_io.rewind
+    out_io = StringIO.new
+    @test00.process_stream(in_io, out_io)
+    out_io.rewind
+    result = out_io.read
+    correct = "Including Fig.\n 3.\n Fig.\n 3? Fig.\n\n4 does' not exist.\n"
+    assert_equal(correct, result)
+  end
+  def test_process_stream_strip
+    # strip
+    test10 = Tefil::LineSplitter.new(separators: %w(.), indent_mode: :strip)
+    in_io = StringIO.new
+    in_io.puts "  Abc def. Ghi jhk.\n"
+    in_io.rewind
+    out_io = StringIO.new
+    test10.process_stream(in_io, out_io)
+    out_io.rewind
+    result = out_io.read
+    correct = "Abc def.\nGhi jhk.\n"
+    assert_equal(correct, result)
+  end
+  #def test_process_stream_indent
+  #  # strip
+  #  test10 = Tefil::LineSplitter.new(separators: %w(.), indent_mode: :indent)
+  #  in_io = StringIO.new
+  #  in_io.puts "  Abc def. Ghi jhk.\n"
+  #  in_io.rewind
+  #  out_io = StringIO.new
+  #  test10.process_stream(in_io, out_io)
+  #  out_io.rewind
+  #  result = out_io.read
+  #  correct = "  Abc def.\n  Ghi jhk.\n"
+  #  assert_equal(correct, result)
+  #end
+  def test_process_stream_except
+    # except
+    in_io = StringIO.new
+    in_io.puts "Including Fig.3. Fig. 3? Fig.\n4 does' not exist.\n"
+    in_io.rewind
+    out_io = StringIO.new
+    test10 = Tefil::LineSplitter.new(separators: ["."], except_words: ["FIG.", "Fig."])
+    test10.process_stream(in_io, out_io)
+    out_io.rewind
+    result = out_io.read
+    correct = "Including Fig.3.\n Fig. 3? Fig.\n4 does' not exist.\n"
+    assert_equal(correct, result)
+  end
+  def test_process_stream_japanese
+    # Japanese kutouten
+    test10 = Tefil::LineSplitter.new(separators: %w(. 。))
+    in_io = StringIO.new
+    in_io.puts "あいうえお。かき\nくけこ。"
+    in_io.rewind
+    out_io = StringIO.new
+    test10.process_stream(in_io, out_io)
+    out_io.rewind
+    result = out_io.read
+    correct = "あいうえお。\nかき\nくけこ。\n"
+    assert_equal(correct, result)
+  end
+end

data/test/test_mdtofswiki.rb CHANGED

@@ -5,7 +5,7 @@ require "helper"
 require "stringio"
 class Tefil::MdToFswiki
-  public :process_stream
+  public :process_stream, :process_string
 end
 class TC_MdToFswiki < Test::Unit::TestCase
@@ -14,39 +14,64 @@ class TC_MdToFswiki < Test::Unit::TestCase
   end
   def test_process_stream
-    #pp "##aho".sub!(/^\#\#\#/  , '')
-    #exit
-    [
-      [ "# head1"                           , "!!! head1"                         ],
-      [ "## head2"                          , "!! head2"                          ],
-      [ "### head3"                         , "! head3"                           ],
-      [ "abc *italic* def"                  , "abc ''italic'' def"                ],
+    assert_equal("!!! head1"          , @f00.process_string(  "# head1"            ))
+    assert_equal("!! head2"           , @f00.process_string(  "## head2"           ))
+    assert_equal("! head3"            , @f00.process_string(  "### head3"          ))
+    assert_equal("abc ''italic'' def" , @f00.process_string(  "abc *italic* def"   ))
+    assert_equal("* item"             , @f00.process_string(  "* item"             ))
+    assert_equal("** item"            , @f00.process_string(  "  * item"           ))
+    assert_equal("*** item"           , @f00.process_string(  "    * item"         ))
+    assert_equal("**** item"          , @f00.process_string(  "      * item"       ))
+    assert_equal("+ enum"             , @f00.process_string(  "0. enum"            ))
+    assert_equal("++ enum"            , @f00.process_string(  "  0. enum"          ))
+    assert_equal("+++ enum"           , @f00.process_string(  "    0. enum"        ))
+    assert_equal("++++ enum"          , @f00.process_string(  "      0. enum"      ))
+    assert_equal("64 str"            , @f00.process_string(  "64 str"          ))
+    assert_equal(" formatted text"    , @f00.process_string(  "    formatted text" ))
+    assert_equal("----"               , @f00.process_string(  "---"                ))
+    assert_equal("[Google|http://www.google.co.jp/]", @f00.process_string(  "[Google](http://www.google.co.jp/)"))
       #[ "abc **bold** def"                  , "abc '''bold''' def"                ],
       #[                                     , "abc ==strike== def"               ],
       #[                                     , "abc __underline__ def"            ],
       #[                                     , '"" quotation'                     ],
-      [ "* item"                            , "* item"                            ],
-      [ "  * item"                          , "** item"                           ],
-      [ "    * item"                        , "*** item"                          ],
-      [ "      * item"                      , "**** item"                         ],
-      [ "0. enum"                           , "+ enum"                            ],
-      [ "  0. enum"                         , "++ enum"                           ],
-      [ "    0. enum"                       , "+++ enum"                          ],
-      [ "      0. enum"                     , "++++ enum"                         ],
       #[                                     , "*http://www.yahoo.co.jp/"         ],
-      [ "[Google](http://www.google.co.jp/)", "[Google|http://www.google.co.jp/]" ],
-      [ "    formatted text"                , " formatted text"                   ],
-      [ "---"                               , "----"                              ],
-      #[ "<!-- comment-->"                   , "// comment"                        ],
-    ].each do |i|
-      $stdin = StringIO.new
-      $stdin.puts i[0]
-      $stdin.rewind
-      #str = capture_stdout{}
-      result = capture_stdout{ @f00.filter([])}
-      correct = sprintf("#{i[1]}\n")
-      assert_equal(correct, result)
-    end
+    #[
+    #  [ "# head1"                           , "!!! head1"                         ],
+    #  [ "## head2"                          , "!! head2"                          ],
+    #  [ "### head3"                         , "! head3"                           ],
+    #  [ "abc *italic* def"                  , "abc ''italic'' def"                ],
+    #  #[ "abc **bold** def"                  , "abc '''bold''' def"                ],
+    #  #[                                     , "abc ==strike== def"               ],
+    #  #[                                     , "abc __underline__ def"            ],
+    #  #[                                     , '"" quotation'                     ],
+    #  [ "* item"                            , "* item"                            ],
+    #  [ "  * item"                          , "** item"                           ],
+    #  [ "    * item"                        , "*** item"                          ],
+    #  [ "      * item"                      , "**** item"                         ],
+    #  [ "0. enum"                           , "+ enum"                            ],
+    #  [ "  0. enum"                         , "++ enum"                           ],
+    #  [ "    0. enum"                       , "+++ enum"                          ],
+    #  [ "      0. enum"                     , "++++ enum"                         ],
+    #  #[                                     , "*http://www.yahoo.co.jp/"         ],
+    #  [ "[Google](http://www.google.co.jp/)", "[Google|http://www.google.co.jp/]" ],
+    #  [ "    formatted text"                , " formatted text"                   ],
+    #  [ "---"                               , "----"                              ],
+    #  #[ "<!-- comment-->"                   , "// comment"                        ],
+    #].each do |i|
+    #  $stdin = StringIO.new
+    #  $stdin.puts i[0]
+    #  $stdin.rewind
+    #  #str = capture_stdout{}
+    #  result = capture_stdout{ @f00.filter([])}
+    #  correct = sprintf("#{i[1]}\n")
+    #  assert_equal(correct, result)
+    #end
   end
 end

data/test/test_textfilterbase.rb CHANGED

@@ -9,10 +9,12 @@ require "tempfile"
 require "fileutils"
 class SampleFilter < Tefil::TextFilterBase
-  def process_stream(in_file, out_file)
+  public :process_string
+  def process_stream(in_io, out_io)
     #results = []
-    in_file.each do |line|
-      out_file.puts line.sub('a', 'A')
+    in_io.each do |line|
+      out_io.print line.sub('a', 'A')
     end
   end
 end
@@ -134,6 +136,12 @@ class TestTefil < Test::Unit::TestCase
     assert_equal(["Abc\n", "def\n", "cAb\n"], tmp)
   end
+  def test_process_string
+    result = @t00.process_string("abc\naabbcc")
+    correct = "Abc\nAabbcc"
+    assert_equal(correct, result)
+  end
 #  def test_self_filter
 #    $stdin = StringIO.new
 #    $stdin.puts "abc"

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tefil
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
 platform: ruby
 authors:
 - ippei94da
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-05-31 00:00:00.000000000 Z
+date: 2018-05-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: test-unit
@@ -115,14 +115,13 @@ executables:
 - calc
 - columnanalyze
 - columnform
-- eachsentence
 - fswiki2md
 - indentconv
 - indentstat
+- linesplit
 - linesub
 - md2fswiki
 - percentpack
-- statistics
 - zshescape
 extensions: []
 extra_rdoc_files:
@@ -139,14 +138,13 @@ files:
 - bin/calc
 - bin/columnanalyze
 - bin/columnform
-- bin/eachsentence
 - bin/fswiki2md
 - bin/indentconv
 - bin/indentstat
+- bin/linesplit
 - bin/linesub
 - bin/md2fswiki
 - bin/percentpack
-- bin/statistics
 - bin/zshescape
 - doc/memo.txt
 - example/calc/run.zsh
@@ -155,26 +153,29 @@ files:
 - example/columnanalyze/run.zsh
 - example/columnformer/indent.txt
 - example/columnformer/sample.txt
-- example/eachsentence/sample.txt
 - example/indentconv/sample0.txt
 - example/indentconv/sample1.txt
 - example/indentstat/indent4.txt
 - example/indentstat/sample0.txt
 - example/indentstat/sample1.txt
+- example/linesplit/run.sh
+- example/linesplit/sample1.txt
+- example/linesplit/sample2.txt
+- example/linesplit/sample3.txt
+- example/linesplit/sample4.txt
 - example/percentpack/sample.txt
 - example/zshescape/sample.txt
 - lib/tefil.rb
 - lib/tefil/calculator.rb
 - lib/tefil/columnanalyzer.rb
 - lib/tefil/columnformer.rb
-- lib/tefil/eachsentence.rb
 - lib/tefil/fswikitomd.rb
 - lib/tefil/indentconverter.rb
 - lib/tefil/indentstatistics.rb
+- lib/tefil/linesplitter.rb
 - lib/tefil/linesubstituter.rb
 - lib/tefil/mdtofswiki.rb
 - lib/tefil/percentpacker.rb
-- lib/tefil/statistics.rb
 - lib/tefil/textfilterbase.rb
 - lib/tefil/zshescaper.rb
 - tefil.gemspec
@@ -185,14 +186,13 @@ files:
 - test/test_calculator.rb
 - test/test_columnanalyzer.rb
 - test/test_columnformer.rb
-- test/test_eachsentence.rb
 - test/test_fswikitomd.rb
 - test/test_indentconverter.rb
 - test/test_indentstatistics.rb
+- test/test_linesplitter.rb
 - test/test_linesubstituter.rb
 - test/test_mdtofswiki.rb
 - test/test_percentpacker.rb
-- test/test_statistics.rb
 - test/test_textfilterbase.rb
 - test/test_zshescaper.rb
 homepage: http://github.com/ippei94da/tefil

data/bin/eachsentence DELETED

@@ -1,24 +0,0 @@
-#! /usr/bin/env ruby
-# coding: utf-8
-# convert to text with each line / each sentence.
-require "pp"
-require "optparse"
-require "rubygems"
-require "tefil"
-# option analysis
-options = {}
-op = OptionParser.new
-#op.banner = [
-#  "Usage: #{File.basename("#{__FILE__}")} [options] [files]",
-#].join("\n")
-op.on("-o"     , "--overwrite"     , "Overwrite."){    options[:overwrite] = true}
-op.parse!(ARGV)
-options[:overwrite] ||= false
-is = Tefil::EachSentence.new options
-is.filter(ARGV)

data/bin/statistics DELETED

@@ -1,24 +0,0 @@
-#! /usr/bin/env ruby
-# coding: utf-8
-require "pp"
-require "optparse"
-require "rubygems"
-require "tefil"
-# option analysis
-options = {}
-op = OptionParser.new
-#op.banner = [
-#  "Usage: #{File.basename("#{__FILE__}")} [options] [files]",
-#].join("\n")
-op.on("-o"     , "--overwrite"     , "Overwrite."){    options[:overwrite] = true}
-op.parse!(ARGV)
-options[:overwrite] ||= false
-stat = Tefil::Statistics.new options
-stat.filter(ARGV)

data/example/eachsentence/sample.txt DELETED

@@ -1,14 +0,0 @@
-After the loop is broken, a pair of nodes, comprising an OSS
-and its qe-OSS, is obtained; OSS $(s_4^{-}, s_2)$ and
-$(s_2, s_4)$ in Fig. \ref{fig1}.  Each of the OSSs can be
-elevated to $\vstart$ by using its stored data. An MEP that
-connects $\vstart$ and qe-OSS of $\vstart$.
-あいうえお。かき
-くけこ。
-あいうえお．かき
-くけこ．
-あいうえお、かき
-くけこ、
-あいうえお，かき
-くけこ，

data/lib/tefil/eachsentence.rb DELETED

@@ -1,35 +0,0 @@
-# coding: utf-8
-class Tefil::EachSentence < Tefil::TextFilterBase
-  END_CHAR = %w(. ? ． 。)
-  NOT_END_WORDS = ["Fig.", "FIG."]
-  def initialize(options = {})
-    options[:smart_filename] = true
-    @minimum = options[:minimum]
-    super(options)
-  end
-  def process_stream(in_io, out_io)
-    results = []
-    #words = []
-    in_io.read.strip.split("\n").each do |line|
-      new_line = ''
-      #line.gsub!("\n", ' ')
-      line.chars.each do |char|
-        new_line += char
-        new_line += "\n" if (END_CHAR.include?(char))
-      end
-      NOT_END_WORDS.each do |word|
-        new_line.gsub!(/#{word}\n/, word)
-      end
-      new_line.gsub!(/\n  */, "\n")
-      new_line.strip!
-      new_line.gsub!(/  */, " ")
-      results << new_line
-    end
-    out_io.puts results.join("\n")
-  end
-end