RubyGems - nekoneko_gen - Versions diffs - 0.1.1 → 0.2.1 - Mend

nekoneko_gen 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/lib/nekoneko_gen/arow.rb +22 -38
data/lib/nekoneko_gen/classifier.rb +21 -0
data/lib/nekoneko_gen/classifier_factory.rb +22 -0
data/lib/nekoneko_gen/linear_classifier.rb +86 -0
data/lib/nekoneko_gen/mlp.rb +176 -0
data/lib/nekoneko_gen/pa.rb +68 -0
data/lib/nekoneko_gen/text_classifier_generator.rb +39 -40
data/lib/nekoneko_gen/version.rb +1 -1
data/lib/nekoneko_gen.rb +30 -8
data/test/nekoneko_gen_test.rb +69 -41
metadata +13 -8

data/lib/nekoneko_gen/arow.rb CHANGED Viewed

@@ -1,72 +1,56 @@
 # -*- coding: utf-8 -*-
+require File.expand_path(File.join(File.dirname(__FILE__), 'linear_classifier'))
 module NekonekoGen
-  class Arow
+  # Adaptive Regularization of Weight Vector
+  class Arow < LinearClassifier
     R = 6.0
-    attr_accessor :k, :w
+    DEFAULT_ITERATION = 20
     def initialize(k, options = {})
-      @r = options[:r] || R
+      @r = options[:c] || R
       @k = k
       @cov = []
+      @covb = []
       @w = []
+      @bias = []
       if (@k == 2)
         @cov[0] = Hash.new(1.0)
         @w[0] = Hash.new(0.0)
+        @covb[0] = 1.0
+        @bias[0] = 0.0
       else
         k.times do |i|
           @cov[i] = Hash.new(1.0)
           @w[i] = Hash.new(0.0)
+          @covb[i] = 1.0
+          @bias[i] = 0.0
         end
       end
     end
-    def update(vec, label)
-      loss = 0.0
-      if (@k == 2)
-        loss = update_at(0, vec, label)
-      else
-        nega = rand(@k - 1)
-        if (nega == label)
-          nega += 1
-        end
-        s = 1.0 / @k
-        @k.times do |i|
-          loss += update_at(i, vec, label) * s
-        end
-      end
-      loss
-    end
-    def strip!
-      @w.each do |w|
-        w.reject!{|k,v| v.abs <= Float::EPSILON }
-      end
-      @w
-    end
-    private
-    def dot(vec, w)
-      dot = 0.0
-      vec.each do |k, v|
-        if (a = w[k])
-          dot += a * v
-        end
-      end
-      dot
-    end
     def update_at(i, vec, label)
       w = @w[i]
       cov = @cov[i]
+      covb = @covb[i]
+      bias = @bias[i]
       y = label == i ? 1 : -1
-      score = dot(vec, w)
+      score = bias + dot(vec, w)
       alpha = 1.0 - y * score
       if (alpha > 0.0)
         r_inv= 1.0 / @r
-        var = vec.map {|k, v| cov[k] * v * v }.reduce(:+)
+        var = vec.map{|k, v| cov[k] * v * v }.reduce(:+) + covb
         alpha *= (1.0 / (var + @r)) * y
         vec.each do |k, v|
           w[k] += alpha * cov[k] * v
           cov[k] = 1.0 / ((1.0 / cov[k]) + (v * v * r_inv))
         end
+        @bias[i] += alpha * covb
+        @covb[i] = 1.0 / ((1.0 / covb) + r_inv)
       end
       score * y < 0.0 ? 1.0 : 0.0
     end
+    def default_iteration
+      DEFAULT_ITERATION
+    end
   end
 end

data/lib/nekoneko_gen/classifier.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+module NekonekoGen
+  class Classifier
+    attr_reader :k
+    def parameter_code(index_converter = nil)
+      raise NotImplementedError
+    end
+    def classify_method_code
+      raise NotImplementedError
+    end
+    def update(vec, label)
+      raise NotImplementedError
+    end
+    def features(i = -1)
+      raise NotImplementedError
+    end
+    def default_iteration
+      raise NotImplementedError
+    end
+  end
+end

data/lib/nekoneko_gen/classifier_factory.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+require File.expand_path(File.join(File.dirname(__FILE__), 'arow'))
+require File.expand_path(File.join(File.dirname(__FILE__), 'pa'))
+require File.expand_path(File.join(File.dirname(__FILE__), 'mlp'))
+module NekonekoGen
+  module ClassifierFactory
+    def self.create(k, options)
+      method = options[:method] || :arow
+      case (method)
+      when :arow
+        Arow.new(k, options)
+      when :pa, :pa1, :pa2
+        PA.new(k, options)
+      when :mlp
+        MLP.new(k, options)
+      else
+        raise ArgumentError
+      end
+    end
+  end
+end

data/lib/nekoneko_gen/linear_classifier.rb ADDED Viewed

@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+require 'json'
+require File.expand_path(File.join(File.dirname(__FILE__), 'classifier'))
+module NekonekoGen
+  class LinearClassifier < Classifier
+    attr_reader :w, :bias
+    def dot(vec, w)
+      dot = 0.0
+      vec.each do |k, v|
+        if (a = w[k])
+          dot += a * v
+        end
+      end
+      dot
+    end
+    def strip!
+      @w.each {|w|
+        w.reject!{|k,v|
+          if (v.abs < Float::EPSILON)
+            # p v
+            true
+          else
+            false
+          end
+        }
+      }
+      @w
+    end
+    def update(vec, label)
+      loss = 0.0
+      if (@k == 2)
+        loss = update_at(0, vec, label)
+      else
+        s = 1.0 / @k
+        @k.times do |i|
+          loss += update_at(i, vec, label) * s
+        end
+      end
+      loss
+    end
+    def features(i = -1)
+      if (i < 0)
+        w.reduce(0){|sum, v| sum + v.size }
+      else
+        w[i].size
+      end
+    end
+    def parameter_code(lang, index_converter = lambda{|i| i})
+      lang ||= :ruby
+      case lang
+      when :ruby
+      else
+        raise NotImplementedError
+      end
+      wvec = self.strip!.map {|w|
+        w.reduce({}) {|h, kv| h[index_converter.call(kv[0])] = kv[1]; h }
+      }
+      <<CODE
+  BIAS = #{self.bias.inspect}
+  W = JSON.load(#{wvec.to_json.inspect})
+CODE
+    end
+    def classify_method_code(lang)
+      lang ||= :ruby
+      case lang
+      when :ruby
+      else
+        raise NotImplementedError
+      end
+      <<CODE
+  def self.classify(vec)
+    if (K == 2)
+      BIAS[0] + W[0].values_at(*vec).compact.reduce(0.0, :+) > 0.0 ? 0 : 1
+    else
+      W.each_with_index.map {|w, i|
+        [BIAS[i] + w.values_at(*vec).compact.reduce(0.0, :+), i]
+      }.max.pop
+    end
+  end
+CODE
+    end
+  end
+end

data/lib/nekoneko_gen/mlp.rb ADDED Viewed

@@ -0,0 +1,176 @@
+require 'json'
+require File.expand_path(File.join(File.dirname(__FILE__), 'classifier'))
+module NekonekoGen
+  # Multi Layer Perceptron
+  class MLP < Classifier
+    IR = 0.4
+    HR = 0.1
+    NOISE_VAR = 0.3
+    MARGIN = 0.2
+    DEFAULT_ITERATION = 40
+    def default_hidden_unit
+      @k
+    end
+    def initialize(k, options)
+      @k = k
+      @output_units = @k == 2 ? 1 : @k
+      @hidden_units = (options[:c] || default_hidden_unit).to_i
+      @input = []
+      @hidden = []
+      @input_bias = []
+      @hidden_bias = []
+      @hidden_units.times do |i|
+        @input[i] = Hash.new {|hash, key| hash[key] = default_value }
+        @input_bias[i] = default_value
+      end
+      @output_units.times do |i|
+        @hidden[i] = []
+        @hidden_units.times do |j|
+          @hidden[i][j] = default_value
+        end
+        @hidden_bias[i] = default_value
+      end
+    end
+    def update(vec, label)
+      input_y = []
+      hidden_y = []
+      output_y = []
+      input_y = @hidden_units.times.map do |i|
+        w = @input[i]
+        sigmoid(@input_bias[i] + vec.map{|k, v| w[k] * v}.reduce(:+) + noise)
+      end
+      hidden_y = @output_units.times.map do |i|
+        @hidden_bias[i] + input_y.zip(@hidden[i]).map{|a, b| a * b }.reduce(:+)
+      end
+      output_y = @output_units.times.map do |i|
+        sigmoid(hidden_y[i])
+      end
+      loss = 0.0
+      dotrain = false
+      if (@output_units == 1)
+        if (output_y[0] > 0.5)
+          l = 0
+        else
+          l = 1
+        end
+        if (label == 0)
+          if (output_y[0] < 1.0 - MARGIN)
+            dotrain = true
+          end
+        else
+          if (output_y[0] > MARGIN)
+            dotrain = true
+          end
+        end
+        loss = (label == l) ? 0.0 : 1.0
+      else
+        max_p, l = output_y.each_with_index.max
+        if (l == label)
+          if (max_p < 1.0 - MARGIN)
+            dotrain = true
+          end
+        else
+          loss = 1.0
+          dotrain = true
+        end
+      end
+      if (dotrain)
+        output_bp = @output_units.times.map do |i|
+          y = hidden_y[i]
+          yt = (label == i) ? 1.0 : 0.0
+          expy = Math.exp(y)
+           -((2.0 * yt - 1.0) * expy + yt) / (Math.exp(2.0 * y) + 2.0 * expy + 1.0)
+        end
+        hidden_bp = @hidden_units.times.map do |j|
+          y = 0.0
+          @output_units.times do |i|
+            y += output_bp[i] * @hidden[i][j]
+          end
+          y * (1.0 - input_y[j]) * input_y[j]
+        end
+        @output_units.times do |j|
+          hidden = @hidden[j]
+          @hidden_units.times do |i|
+            hidden[i] -= HR * input_y[i] * output_bp[j]
+          end
+          @hidden_bias[j] -= HR * output_bp[j]
+        end
+        @hidden_units.times do |i|
+          input = @input[i]
+          vec.each do |k, v|
+            input[k] -= IR * v * hidden_bp[i]
+          end
+          @input_bias[i] -= IR * hidden_bp[i]
+        end
+      end
+      loss
+    end
+    def features(i = -1)
+      @input.map{|v| v.size }.reduce(:+)
+    end
+    def sigmoid(a)
+      1.0 / (1.0 + Math.exp(-a))
+    end
+    def default_value
+      (rand - 0.5)
+    end
+    def noise
+      (Math.sqrt(-2.0 * Math.log(rand)) * Math.sin(2.0 * Math::PI * rand)) * NOISE_VAR
+    end
+    def default_iteration
+      DEFAULT_ITERATION
+    end
+    def parameter_code(lang, index_converter = lambda{|i| i})
+      lang ||= :ruby
+      case lang
+      when :ruby
+      else
+        raise NotImplementedError
+      end
+      wvec = @input.map {|w|
+        w.reduce({}) {|h, kv| h[index_converter.call(kv[0])] = kv[1]; h }
+      }
+      <<CODE
+  HIDDEN_UNITS = #{@hidden_units}
+  INPUT_BIAS = #{@input_bias.inspect}
+  HIDDEN_BIAS = #{@hidden_bias.inspect}
+  INPUT_W = JSON.load(#{wvec.to_json.inspect})
+  HIDDEN_W = #{@hidden.inspect}
+CODE
+    end
+    def classify_method_code(lang)
+      lang ||= :ruby
+      case lang
+      when :ruby
+      else
+        raise NotImplementedError
+      end
+      <<CODE
+  def self.classify(vec)
+    input_y = []
+    output_y = []
+    HIDDEN_UNITS.times do |i|
+      input_y[i] = sigmoid(INPUT_BIAS[i] +
+                           INPUT_W[i].values_at(*vec).compact.reduce(0.0, :+))
+    end
+    if (K == 2)
+      HIDDEN_BIAS[0] +
+        input_y.zip(HIDDEN_W[0]).map{|a, b| a * b }.reduce(:+) > 0.0 ? 0 : 1
+    else
+      K.times.map{|i|
+        [HIDDEN_BIAS[i] + input_y.zip(HIDDEN_W[i]).map{|a, b| a * b }.reduce(:+), i]
+      }.max.pop
+    end
+  end
+  def self.sigmoid(a)
+    1.0 / (1.0 + Math.exp(-a))
+  end
+CODE
+    end
+  end
+end

data/lib/nekoneko_gen/pa.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+require File.expand_path(File.join(File.dirname(__FILE__), 'linear_classifier'))
+module NekonekoGen
+  # Passive Agressive
+  class PA < LinearClassifier
+    C = 1.0
+    NORM = 2.0 # norm + BIAS
+    DEFAULT_ITERATION = 20
+    def initialize(k, options = {})
+      @k = k
+      @c = options[:c] || C
+      @w = []
+      @bias = []
+      if (@k == 2)
+        @w[0] = Hash.new(0.0)
+        @bias[0] = 0.0
+      else
+        k.times do |i|
+          @w[i] = Hash.new(0.0)
+          @bias[i] = 0.0
+        end
+      end
+      if options[:method]
+        @tau =
+          case options[:method]
+          when :pa
+            lambda{|y, l| pa(y, l)}
+          when :pa1
+            lambda{|y, l| pa1(y, l)}
+          when :pa2
+            lambda{|y, l| pa2(y, l)}
+          else
+            lambda{|y, l| pa2(y, l)}
+          end
+      else
+        @tau = lambda{|y, l| pa2(y, l)}
+      end
+    end
+    def pa2(y, l)
+      y * (l / NORM + 0.5 / @c)
+    end
+    def pa1(y, l)
+      y * [@c, (l / NORM)].min
+    end
+    def pa(y, l)
+      y * l / NORM
+    end
+    def update_at(i, vec, label)
+      y = label == i ? 1 : -1
+      w = @w[i]
+      score = @bias[i] + dot(vec, w)
+      l = 1.0 - score * y
+      if (l > 0.0)
+        alpha = @tau.call(y, l)
+        vec.each do |k, v|
+          w[k] += alpha * v
+        end
+        @bias[i] += alpha
+      end
+      y * score < 0.0 ? 1.0 : 0.0
+    end
+    def default_iteration
+      DEFAULT_ITERATION
+    end
+  end
+end

data/lib/nekoneko_gen/text_classifier_generator.rb CHANGED Viewed

@@ -1,9 +1,8 @@
 # -*- coding: utf-8 -*-
-require 'json'
 require 'nkf'
 require 'bimyou_segmenter'
-require File.expand_path(File.join(File.dirname(__FILE__), 'arow'))
+require File.expand_path(File.join(File.dirname(__FILE__), 'classifier_factory'))
 module NekonekoGen
   class TextClassifierGenerator
@@ -15,16 +14,15 @@ module NekonekoGen
       @files = files
       @word2id = {}
       @id2word = {}
-      @arow = Arow.new(files.size, options)
+      @classifier = ClassifierFactory.create(files.size, options)
       @name = safe_name(@filename).split("_").map(&:capitalize).join
       @labels = files.map {|file| "#{safe_name(file).upcase}"}
     end
-    def train(iteration = 20)
-      iteration ||= 20
+    def train(iteration = nil)
+      iteration ||= @classifier.default_iteration
       data = []
-      @arow.k.times do |i|
+      @classifier.k.times do |i|
         t = Time.now
         data[i] = []
         print "loading #{@files[i]}... "
@@ -49,31 +47,35 @@ module NekonekoGen
         t = Time.now
         print sprintf("step %3d...", step)
-        @arow.k.times.map do |i|
+        @classifier.k.times.map do |i|
           sampling(data[i], samples).map {|vec| [vec, i] }
         end.flatten(1).shuffle!.each do |v|
-          loss += @arow.update(v[0], v[1])
+          loss += @classifier.update(v[0], v[1])
           c += 1
         end
         print sprintf(" %.6f, %.4fs\n", 1.0 - loss / c.to_f, Time.now - t)
       end
-      @arow.strip!
-      if (@arow.k > 2)
-        @arow.w.each_with_index do |w, i|
-          puts "#{@labels[i]} : #{w.size} features"
+      if (@classifier.k > 2)
+        @classifier.k.times do |i|
+          puts "#{@labels[i]} : #{@classifier.features(i)} features"
         end
       else
-        puts "#{@labels[0]}, #{@labels[1]} : #{@arow.w[0].size} features"
+        puts "#{@labels[0]}, #{@labels[1]} : #{@classifier.features(0)} features"
       end
       puts "done nyan! "
     end
-    def generate
-      wv = @arow.w.map {|w|
-        w.reduce({}) {|h, kv| h[id2word(kv[0])] = kv[1]; h }
-      }
+    def generate(lang = :ruby)
+      lang ||= :ruby
+      case lang
+      when :ruby
+        generate_ruby_code
+      else
+        raise NotImplementedError
+      end
+      @name
+    end
+    def generate_ruby_code
       labels = @labels.each_with_index.map{|v, i| "  #{v} = #{i}"}.join("\n")
       File.open(@filename, "w") do |f|
         f.write <<MODEL
 # -*- coding: utf-8 -*-
@@ -82,9 +84,21 @@ require 'json'
 require 'bimyou_segmenter'
 class #{@name}
+  def self.k
+    K
+  end
   def self.predict(text)
+    classify(fv(text))
+  end
+#{labels}
+  LABELS = #{@labels.inspect}
+  K = #{@classifier.k}
+  private
+  def self.fv(text)
     prev = nil
-    vec = BimyouSegmenter.segment(text).map do |word|
+    BimyouSegmenter.segment(text).map do |word|
       if (prev)
         if (NGRAM_TARGET =~ word)
           nword = [prev + word, word]
@@ -101,27 +115,14 @@ class #{@name}
         word
       end
     end.flatten(1)
-    vec << " bias "
-    if (W.size == 1)
-      W[0].values_at(*vec).compact.reduce(:+) > 0.0 ? 0 : 1
-    else
-      W.each_with_index.map {|w,i|
-        [w.values_at(*vec).compact.reduce(:+), i]
-      }.max.pop
-    end
   end
-  def self.k
-    W.size == 1 ? 2 : W.size
-  end
-#{labels}
-  LABELS = #{@labels.inspect}
+#{@classifier.classify_method_code(:ruby)}
-  private
   NGRAM_TARGET = Regexp.new('(^[ァ-ヾ]+$)|(^[a-zA-Z\\-_ａ-ｚＡ-Ｚ‐＿0-9０-９]+$)|' +
                          '(^[々〇ヵヶ' + [0x3400].pack('U') + '-' + [0x9FFF].pack('U') +
                          [0xF900].pack('U') + '-' + [0xFAFF].pack('U') +
-                         [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']+$)')
-  W = JSON.load(#{wv.to_json.inspect})
+                            [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']+$)')
+#{@classifier.parameter_code(:ruby, lambda{|id| id2word(id) })}
 end
 MODEL
       end
@@ -143,8 +144,6 @@ MODEL
     end
     def fv(text)
       vec = Hash.new(0)
-      vec[word2id(" bias ")] = 1
       prev = nil
       words = BimyouSegmenter.segment(text, :white_space => true).map do |word|
         if (prev)
@@ -170,7 +169,7 @@ MODEL
       vec
     end
     def normalize(vec)
-      norm = Math.sqrt(vec.each_value.reduce(0){|a, v| a + v * v })
+      norm = Math.sqrt(vec.values.map{|v| v * v }.reduce(:+))
       if (norm > 0.0)
         s = 1.0 / norm
         vec.each do |k, v|

data/lib/nekoneko_gen/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 module NekonekoGen
-  VERSION = "0.1.1"
+  VERSION = "0.2.1"
 end

data/lib/nekoneko_gen.rb CHANGED Viewed

@@ -5,29 +5,51 @@ require 'optparse'
 require 'fileutils'
 module NekonekoGen
-  DEFAULT_ITERATION = 20
   def self.run(argv)
-    iteration = DEFAULT_ITERATION
+    iteration = nil
     rubyfile = nil
     quiet = false
-    $stdout.sync = true
+    $stdout.sync = true
+    method = nil
+    c = nil
     opt = OptionParser.new do |o|
       o.on('-n NAME', 'new classifier name') do |v|
         rubyfile = File.join(File.dirname(v), File.basename(v, ".*") + ".rb")
         FileUtils.touch(rubyfile)
       end
-      o.on('-i N', "iteration count (default: #{DEFAULT_ITERATION})") do |v|
+      o.on('-i N', "iteration (default: auto)") do |v|
         iteration = v.to_i.abs
       end
+      o.on('-m METHOD', "machine learning method [AROW|PA2|MLP] (default AROW)") do |v|
+        if (v)
+          case v.downcase
+          when 'arow'
+            method = :arow
+          when 'pa1'
+            method = :pa1
+          when 'pa2'
+            method = :pa2
+          when 'mlp'
+            method = :mlp
+          else
+            warn opt
+            return -1
+          end
+        else
+          warn opt
+          return -1
+        end
+      end
+      o.on('-p C', "parameter (default AROW::R=6.0, PA2::C=1.0, MLP::HIDDEN_UNIT=K)") do |v|
+        c = v.to_f
+      end
       o.on('-q', "quiet") do
         quiet = true
       end
     end
     opt.version = NekonekoGen::VERSION
-    opt.banner = "Usage: nekoneko_gen -n classifier_name file1 file2 [files...]"
+    opt.banner = "Usage: nekoneko_gen [OPTIONS] -n NAME FILE1 FILE2 [FILES...]"
     files = opt.parse(argv)
     unless (rubyfile)
@@ -45,7 +67,7 @@ module NekonekoGen
       end
     end
-    gen = NekonekoGen::TextClassifierGenerator.new(rubyfile, files)
+    gen = NekonekoGen::TextClassifierGenerator.new(rubyfile, files, {:method => method, :c => c})
     if (quiet)
       gen.quiet = true
     end

data/test/nekoneko_gen_test.rb CHANGED Viewed

@@ -6,118 +6,146 @@ class NekonekoGenTest < Test::Unit::TestCase
     @file0 = File.join(File.dirname(__FILE__), 'class0.txt')
     @file1 = File.join(File.dirname(__FILE__), 'class1.txt')
     @file2 = File.join(File.dirname(__FILE__), 'class2.txt')
-    @output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_classifier.rb")
-    @output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_classifier.rb")
+    @clean_files = []
   end
   def teardown
-    cleanup!
+    @clean_files.each do |file|
+      if (File.exist?(file))
+        File.unlink(file)
+      end
+    end
   end
-  def cleanup!
-    begin
-      File.unlink(@output_file2)
-    rescue
+  def test_mlp
+    gen2('mlp', {:method => :mlp})
+    gen3('mlp', {:method => :mlp})
+  end
+  def test_pa2
+    gen2('pa2', {:method => :pa2})
+    gen3('pa2', {:method => :pa2})
+  end
+  def test_arow
+    gen2('arow', {:method => :arow})
+    gen3('arow',{:method => :arow})
+  end
+  def clean!(a, b)
+    if (File.exist?(a))
+      File.unlink(a)
     end
-    begin
-      File.unlink(@output_file3)
-    rescue
+    if (File.exist?(b))
+      File.unlink(b)
     end
-  end
+  end
-  def test_gen2
-    cleanup!
+  def gen2(prefix, options)
+    p "---- #{prefix} generate 2class"
+    output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_#{prefix}_classifier.rb")
+    output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_#{prefix}_classifier.rb")
-    gen = NekonekoGen::TextClassifierGenerator.new(@output_file2, [@file0, @file1])
-    #gen.quiet = true
-    gen.train(NekonekoGen::DEFAULT_ITERATION)
-    gen.generate
+    clean!(output_file2, output_file3)
+    @clean_files << output_file2
+    @clean_files << output_file3
-    unless (File.exist?(@output_file2))
-      assert_equal "#{@output_file2} not found", nil
+    gen = NekonekoGen::TextClassifierGenerator.new(output_file2, [@file0, @file1], options)
+    gen.train
+    modname = gen.generate
+    unless (File.exist?(output_file2))
+      assert_equal "#{output_file2} not found", nil
     end
     begin
-      load @output_file2
+      load output_file2
+      mod = Kernel.const_get(modname)
       ok = 0
       count = 0
       File.open(@file0) do |f|
         until f.eof?
-          if (NekonekoTest2Classifier.predict(f.readline) == NekonekoTest2Classifier::CLASS0)
+          if (mod.predict(f.readline) == mod::CLASS0)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest2Classifier::LABELS[0]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[0]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
       ok = 0
       count = 0
       File.open(@file1) do |f|
         until f.eof?
-          if (NekonekoTest2Classifier.predict(f.readline) == NekonekoTest2Classifier::CLASS1)
+          if (mod.predict(f.readline) == mod::CLASS1)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest2Classifier::LABELS[1]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[1]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
     end
   end
-  def test_gen3
-    cleanup!
+  def gen3(prefix, options)
+    p "---- #{prefix} generate 3class"
+    output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_#{prefix}_classifier.rb")
+    output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_#{prefix}_classifier.rb")
+    clean!(output_file2, output_file3)
+    @clean_files << output_file2
+    @clean_files << output_file3
-    gen = NekonekoGen::TextClassifierGenerator.new(@output_file3, [@file0, @file1, @file2])
-    #gen.quiet = true
-    gen.train(NekonekoGen::DEFAULT_ITERATION)
-    gen.generate
+    gen = NekonekoGen::TextClassifierGenerator.new(output_file3,
+                                                   [@file0, @file1, @file2], options)
+    gen.train
+    modname = gen.generate
-    unless (File.exist?(@output_file3))
-      assert_equal "#{@output_file3} not found", nil
+    unless (File.exist?(output_file3))
+      assert_equal "#{output_file3} not found", nil
     end
     begin
-      load @output_file3
+      load output_file3
+      mod = Kernel.const_get(modname)
       ok = 0
       count = 0
       File.open(@file0) do |f|
         until f.eof?
-          if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS0)
+          if (mod.predict(f.readline) == mod::CLASS0)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest3Classifier::LABELS[0]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[0]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
       ok = 0
       count = 0
       File.open(@file1) do |f|
         until f.eof?
-          if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS1)
+          if (mod.predict(f.readline) == mod::CLASS1)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest3Classifier::LABELS[1]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[1]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
       ok = 0
       count = 0
       File.open(@file2) do |f|
         until f.eof?
-          if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS2)
+          if (mod.predict(f.readline) == mod::CLASS2)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest3Classifier::LABELS[2]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[2]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
     end
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: nekoneko_gen
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.1
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-05-29 00:00:00.000000000Z
+date: 2012-06-01 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bimyou_segmenter
-  requirement: &7671220 !ruby/object:Gem::Requirement
+  requirement: &14306440 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *7671220
+  version_requirements: *14306440
 - !ruby/object:Gem::Dependency
   name: json
-  requirement: &7668960 !ruby/object:Gem::Requirement
+  requirement: &14304220 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *7668960
+  version_requirements: *14304220
 - !ruby/object:Gem::Dependency
   name: test-unit
-  requirement: &7641320 !ruby/object:Gem::Requirement
+  requirement: &14303060 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *7641320
+  version_requirements: *14303060
 description: Japanese Text Classifier Generator
 email:
 - nagadomi@nurs.or.jp
@@ -60,6 +60,11 @@ files:
 - bin/nekoneko_gen
 - lib/nekoneko_gen.rb
 - lib/nekoneko_gen/arow.rb
+- lib/nekoneko_gen/classifier.rb
+- lib/nekoneko_gen/classifier_factory.rb
+- lib/nekoneko_gen/linear_classifier.rb
+- lib/nekoneko_gen/mlp.rb
+- lib/nekoneko_gen/pa.rb
 - lib/nekoneko_gen/text_classifier_generator.rb
 - lib/nekoneko_gen/version.rb
 - nekoneko_gen.gemspec