RubyGems - nekoneko_gen - Versions diffs - 0.1.1 → 0.2.1 - Mend

nekoneko_gen 0.1.1 → 0.2.1

Files changed (11) hide show

data/lib/nekoneko_gen/arow.rb +22 -38
data/lib/nekoneko_gen/classifier.rb +21 -0
data/lib/nekoneko_gen/classifier_factory.rb +22 -0
data/lib/nekoneko_gen/linear_classifier.rb +86 -0
data/lib/nekoneko_gen/mlp.rb +176 -0
data/lib/nekoneko_gen/pa.rb +68 -0
data/lib/nekoneko_gen/text_classifier_generator.rb +39 -40
data/lib/nekoneko_gen/version.rb +1 -1
data/lib/nekoneko_gen.rb +30 -8
data/test/nekoneko_gen_test.rb +69 -41
metadata +13 -8

data/lib/nekoneko_gen/arow.rb CHANGED Viewed

@@ -1,72 +1,56 @@
 # -*- coding: utf-8 -*-
+require File.expand_path(File.join(File.dirname(__FILE__), 'linear_classifier'))
 module NekonekoGen
-  class Arow
+  # Adaptive Regularization of Weight Vector
+  class Arow < LinearClassifier
     R = 6.0
-    attr_accessor :k, :w
+    DEFAULT_ITERATION = 20
     def initialize(k, options = {})
-      @r = options[:r] || R
+      @r = options[:c] || R
       @k = k
       @cov = []
+      @covb = []
       @w = []
+      @bias = []
       if (@k == 2)
         @cov[0] = Hash.new(1.0)
         @w[0] = Hash.new(0.0)
+        @covb[0] = 1.0
+        @bias[0] = 0.0
       else
         k.times do |i|
           @cov[i] = Hash.new(1.0)
           @w[i] = Hash.new(0.0)
+          @covb[i] = 1.0
+          @bias[i] = 0.0
         end
       end
     end
-    def update(vec, label)
-      loss = 0.0
-      if (@k == 2)
-        loss = update_at(0, vec, label)
-      else
-        nega = rand(@k - 1)
-        if (nega == label)
-          nega += 1
-        end
-        s = 1.0 / @k
-        @k.times do |i|
-          loss += update_at(i, vec, label) * s
-        end
-      end
-      loss
-    end
-    def strip!
-      @w.each do |w|
-        w.reject!{|k,v| v.abs <= Float::EPSILON }
-      end
-      @w
-    end
-    private
-    def dot(vec, w)
-      dot = 0.0
-      vec.each do |k, v|
-        if (a = w[k])
-          dot += a * v
-        end
-      end
-      dot
-    end
     def update_at(i, vec, label)
       w = @w[i]
       cov = @cov[i]
+      covb = @covb[i]
+      bias = @bias[i]
       y = label == i ? 1 : -1
-      score = dot(vec, w)
+      score = bias + dot(vec, w)
       alpha = 1.0 - y * score
       if (alpha > 0.0)
         r_inv= 1.0 / @r
-        var = vec.map {|k, v| cov[k] * v * v }.reduce(:+)
+        var = vec.map{|k, v| cov[k] * v * v }.reduce(:+) + covb
         alpha *= (1.0 / (var + @r)) * y
         vec.each do |k, v|
           w[k] += alpha * cov[k] * v
           cov[k] = 1.0 / ((1.0 / cov[k]) + (v * v * r_inv))
         end
+        @bias[i] += alpha * covb
+        @covb[i] = 1.0 / ((1.0 / covb) + r_inv)
       end
       score * y < 0.0 ? 1.0 : 0.0
     end
+    def default_iteration
+      DEFAULT_ITERATION
+    end
   end
 end

data/lib/nekoneko_gen/classifier.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+module NekonekoGen
+  class Classifier
+    attr_reader :k
+    def parameter_code(index_converter = nil)
+      raise NotImplementedError
+    end
+    def classify_method_code
+      raise NotImplementedError
+    end
+    def update(vec, label)
+      raise NotImplementedError
+    end
+    def features(i = -1)
+      raise NotImplementedError
+    end
+    def default_iteration
+      raise NotImplementedError
+    end
+  end
+end

data/lib/nekoneko_gen/classifier_factory.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+require File.expand_path(File.join(File.dirname(__FILE__), 'arow'))
+require File.expand_path(File.join(File.dirname(__FILE__), 'pa'))
+require File.expand_path(File.join(File.dirname(__FILE__), 'mlp'))
+module NekonekoGen
+  module ClassifierFactory
+    def self.create(k, options)
+      method = options[:method] || :arow
+      case (method)
+      when :arow
+        Arow.new(k, options)
+      when :pa, :pa1, :pa2
+        PA.new(k, options)
+      when :mlp
+        MLP.new(k, options)
+      else
+        raise ArgumentError
+      end
+    end
+  end
+end

data/lib/nekoneko_gen/linear_classifier.rb ADDED Viewed

@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+require 'json'
+require File.expand_path(File.join(File.dirname(__FILE__), 'classifier'))
+module NekonekoGen
+  class LinearClassifier < Classifier
+    attr_reader :w, :bias
+    def dot(vec, w)
+      dot = 0.0
+      vec.each do |k, v|
+        if (a = w[k])
+          dot += a * v
+        end
+      end
+      dot
+    end
+    def strip!
+      @w.each {|w|
+        w.reject!{|k,v|
+          if (v.abs < Float::EPSILON)
+            # p v
+            true
+          else
+            false
+          end
+        }
+      }
+      @w
+    end
+    def update(vec, label)
+      loss = 0.0
+      if (@k == 2)
+        loss = update_at(0, vec, label)
+      else
+        s = 1.0 / @k
+        @k.times do |i|
+          loss += update_at(i, vec, label) * s
+        end
+      end
+      loss
+    end
+    def features(i = -1)
+      if (i < 0)
+        w.reduce(0){|sum, v| sum + v.size }
+      else
+        w[i].size
+      end
+    end
+    def parameter_code(lang, index_converter = lambda{|i| i})
+      lang ||= :ruby
+      case lang
+      when :ruby
+      else
+        raise NotImplementedError
+      end
+      wvec = self.strip!.map {|w|
+        w.reduce({}) {|h, kv| h[index_converter.call(kv[0])] = kv[1]; h }
+      }
+      <<CODE
+  BIAS = #{self.bias.inspect}
+  W = JSON.load(#{wvec.to_json.inspect})
+CODE
+    end
+    def classify_method_code(lang)
+      lang ||= :ruby
+      case lang
+      when :ruby
+      else
+        raise NotImplementedError
+      end
+      <<CODE
+  def self.classify(vec)
+    if (K == 2)
+      BIAS[0] + W[0].values_at(*vec).compact.reduce(0.0, :+) > 0.0 ? 0 : 1
+    else
+      W.each_with_index.map {|w, i|
+        [BIAS[i] + w.values_at(*vec).compact.reduce(0.0, :+), i]
+      }.max.pop
+    end
+  end
+CODE
+    end
+  end
+end

data/lib/nekoneko_gen/mlp.rb ADDED Viewed

@@ -0,0 +1,176 @@
+require 'json'
+require File.expand_path(File.join(File.dirname(__FILE__), 'classifier'))
+module NekonekoGen
+  # Multi Layer Perceptron
+  class MLP < Classifier
+    IR = 0.4
+    HR = 0.1
+    NOISE_VAR = 0.3
+    MARGIN = 0.2
+    DEFAULT_ITERATION = 40
+    def default_hidden_unit
+      @k
+    end
+    def initialize(k, options)
+      @k = k
+      @output_units = @k == 2 ? 1 : @k
+      @hidden_units = (options[:c] || default_hidden_unit).to_i
+      @input = []
+      @hidden = []
+      @input_bias = []
+      @hidden_bias = []
+      @hidden_units.times do |i|
+        @input[i] = Hash.new {|hash, key| hash[key] = default_value }
+        @input_bias[i] = default_value
+      end
+      @output_units.times do |i|
+        @hidden[i] = []
+        @hidden_units.times do |j|
+          @hidden[i][j] = default_value
+        end
+        @hidden_bias[i] = default_value
+      end
+    end
+    def update(vec, label)
+      input_y = []
+      hidden_y = []
+      output_y = []
+      input_y = @hidden_units.times.map do |i|
+        w = @input[i]
+        sigmoid(@input_bias[i] + vec.map{|k, v| w[k] * v}.reduce(:+) + noise)
+      end
+      hidden_y = @output_units.times.map do |i|
+        @hidden_bias[i] + input_y.zip(@hidden[i]).map{|a, b| a * b }.reduce(:+)
+      end
+      output_y = @output_units.times.map do |i|
+        sigmoid(hidden_y[i])
+      end
+      loss = 0.0
+      dotrain = false
+      if (@output_units == 1)
+        if (output_y[0] > 0.5)
+          l = 0
+        else
+          l = 1
+        end
+        if (label == 0)
+          if (output_y[0] < 1.0 - MARGIN)
+            dotrain = true
+          end
+        else
+          if (output_y[0] > MARGIN)
+            dotrain = true
+          end
+        end
+        loss = (label == l) ? 0.0 : 1.0
+      else
+        max_p, l = output_y.each_with_index.max
+        if (l == label)
+          if (max_p < 1.0 - MARGIN)
+            dotrain = true
+          end
+        else
+          loss = 1.0
+          dotrain = true
+        end
+      end
+      if (dotrain)
+        output_bp = @output_units.times.map do |i|
+          y = hidden_y[i]
+          yt = (label == i) ? 1.0 : 0.0
+          expy = Math.exp(y)
+           -((2.0 * yt - 1.0) * expy + yt) / (Math.exp(2.0 * y) + 2.0 * expy + 1.0)
+        end
+        hidden_bp = @hidden_units.times.map do |j|
+          y = 0.0
+          @output_units.times do |i|
+            y += output_bp[i] * @hidden[i][j]
+          end
+          y * (1.0 - input_y[j]) * input_y[j]
+        end
+        @output_units.times do |j|
+          hidden = @hidden[j]
+          @hidden_units.times do |i|
+            hidden[i] -= HR * input_y[i] * output_bp[j]
+          end
+          @hidden_bias[j] -= HR * output_bp[j]
+        end
+        @hidden_units.times do |i|
+          input = @input[i]
+          vec.each do |k, v|
+            input[k] -= IR * v * hidden_bp[i]
+          end
+          @input_bias[i] -= IR * hidden_bp[i]
+        end
+      end
+      loss
+    end
+    def features(i = -1)
+      @input.map{|v| v.size }.reduce(:+)
+    end
+    def sigmoid(a)
+      1.0 / (1.0 + Math.exp(-a))
+    end
+    def default_value
+      (rand - 0.5)
+    end
+    def noise
+      (Math.sqrt(-2.0 * Math.log(rand)) * Math.sin(2.0 * Math::PI * rand)) * NOISE_VAR
+    end
+    def default_iteration
+      DEFAULT_ITERATION
+    end
+    def parameter_code(lang, index_converter = lambda{|i| i})
+      lang ||= :ruby
+      case lang
+      when :ruby
+      else
+        raise NotImplementedError
+      end
+      wvec = @input.map {|w|
+        w.reduce({}) {|h, kv| h[index_converter.call(kv[0])] = kv[1]; h }
+      }
+      <<CODE
+  HIDDEN_UNITS = #{@hidden_units}
+  INPUT_BIAS = #{@input_bias.inspect}
+  HIDDEN_BIAS = #{@hidden_bias.inspect}
+  INPUT_W = JSON.load(#{wvec.to_json.inspect})
+  HIDDEN_W = #{@hidden.inspect}
+CODE
+    end
+    def classify_method_code(lang)
+      lang ||= :ruby
+      case lang
+      when :ruby
+      else
+        raise NotImplementedError
+      end
+      <<CODE
+  def self.classify(vec)
+    input_y = []
+    output_y = []
+    HIDDEN_UNITS.times do |i|
+      input_y[i] = sigmoid(INPUT_BIAS[i] +
+                           INPUT_W[i].values_at(*vec).compact.reduce(0.0, :+))
+    end
+    if (K == 2)
+      HIDDEN_BIAS[0] +
+        input_y.zip(HIDDEN_W[0]).map{|a, b| a * b }.reduce(:+) > 0.0 ? 0 : 1
+    else
+      K.times.map{|i|
+        [HIDDEN_BIAS[i] + input_y.zip(HIDDEN_W[i]).map{|a, b| a * b }.reduce(:+), i]
+      }.max.pop
+    end
+  end
+  def self.sigmoid(a)
+    1.0 / (1.0 + Math.exp(-a))
+  end
+CODE
+    end
+  end
+end

data/lib/nekoneko_gen/pa.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+require File.expand_path(File.join(File.dirname(__FILE__), 'linear_classifier'))
+module NekonekoGen
+  # Passive Agressive
+  class PA < LinearClassifier
+    C = 1.0
+    NORM = 2.0 # norm + BIAS
+    DEFAULT_ITERATION = 20
+    def initialize(k, options = {})
+      @k = k
+      @c = options[:c] || C
+      @w = []
+      @bias = []
+      if (@k == 2)
+        @w[0] = Hash.new(0.0)
+        @bias[0] = 0.0
+      else
+        k.times do |i|
+          @w[i] = Hash.new(0.0)
+          @bias[i] = 0.0
+        end
+      end
+      if options[:method]
+        @tau =
+          case options[:method]
+          when :pa
+            lambda{|y, l| pa(y, l)}
+          when :pa1
+            lambda{|y, l| pa1(y, l)}
+          when :pa2
+            lambda{|y, l| pa2(y, l)}
+          else
+            lambda{|y, l| pa2(y, l)}
+          end
+      else
+        @tau = lambda{|y, l| pa2(y, l)}
+      end
+    end
+    def pa2(y, l)
+      y * (l / NORM + 0.5 / @c)
+    end
+    def pa1(y, l)
+      y * [@c, (l / NORM)].min
+    end
+    def pa(y, l)
+      y * l / NORM
+    end
+    def update_at(i, vec, label)
+      y = label == i ? 1 : -1
+      w = @w[i]
+      score = @bias[i] + dot(vec, w)
+      l = 1.0 - score * y
+      if (l > 0.0)
+        alpha = @tau.call(y, l)
+        vec.each do |k, v|
+          w[k] += alpha * v
+        end
+        @bias[i] += alpha
+      end
+      y * score < 0.0 ? 1.0 : 0.0
+    end
+    def default_iteration
+      DEFAULT_ITERATION
+    end
+  end
+end

data/lib/nekoneko_gen/text_classifier_generator.rb CHANGED Viewed

@@ -1,9 +1,8 @@
 # -*- coding: utf-8 -*-
-require 'json'
 require 'nkf'
 require 'bimyou_segmenter'
-require File.expand_path(File.join(File.dirname(__FILE__), 'arow'))
+require File.expand_path(File.join(File.dirname(__FILE__), 'classifier_factory'))
 module NekonekoGen
   class TextClassifierGenerator
@@ -15,16 +14,15 @@ module NekonekoGen
       @files = files
       @word2id = {}
       @id2word = {}
-      @arow = Arow.new(files.size, options)
+      @classifier = ClassifierFactory.create(files.size, options)
       @name = safe_name(@filename).split("_").map(&:capitalize).join
       @labels = files.map {|file| "#{safe_name(file).upcase}"}
     end
-    def train(iteration = 20)
-      iteration ||= 20
+    def train(iteration = nil)
+      iteration ||= @classifier.default_iteration
       data = []
-      @arow.k.times do |i|
+      @classifier.k.times do |i|
         t = Time.now
         data[i] = []
         print "loading #{@files[i]}... "
@@ -49,31 +47,35 @@ module NekonekoGen
         t = Time.now
         print sprintf("step %3d...", step)
-        @arow.k.times.map do |i|
+        @classifier.k.times.map do |i|
           sampling(data[i], samples).map {|vec| [vec, i] }
         end.flatten(1).shuffle!.each do |v|
-          loss += @arow.update(v[0], v[1])
+          loss += @classifier.update(v[0], v[1])
           c += 1
         end
         print sprintf(" %.6f, %.4fs\n", 1.0 - loss / c.to_f, Time.now - t)
       end
-      @arow.strip!
-      if (@arow.k > 2)
-        @arow.w.each_with_index do |w, i|
-          puts "#{@labels[i]} : #{w.size} features"
+      if (@classifier.k > 2)
+        @classifier.k.times do |i|
+          puts "#{@labels[i]} : #{@classifier.features(i)} features"
         end
       else
-        puts "#{@labels[0]}, #{@labels[1]} : #{@arow.w[0].size} features"
+        puts "#{@labels[0]}, #{@labels[1]} : #{@classifier.features(0)} features"
       end
       puts "done nyan! "
     end
-    def generate
-      wv = @arow.w.map {|w|
-        w.reduce({}) {|h, kv| h[id2word(kv[0])] = kv[1]; h }
-      }
+    def generate(lang = :ruby)
+      lang ||= :ruby
+      case lang
+      when :ruby
+        generate_ruby_code
+      else
+        raise NotImplementedError
+      end
+      @name
+    end
+    def generate_ruby_code
       labels = @labels.each_with_index.map{|v, i| "  #{v} = #{i}"}.join("\n")
       File.open(@filename, "w") do |f|
         f.write <<MODEL
 # -*- coding: utf-8 -*-
@@ -82,9 +84,21 @@ require 'json'
 require 'bimyou_segmenter'
 class #{@name}
+  def self.k
+    K
+  end
   def self.predict(text)
+    classify(fv(text))
+  end
+#{labels}
+  LABELS = #{@labels.inspect}
+  K = #{@classifier.k}
+  private
+  def self.fv(text)
     prev = nil
-    vec = BimyouSegmenter.segment(text).map do |word|
+    BimyouSegmenter.segment(text).map do |word|
       if (prev)
         if (NGRAM_TARGET =~ word)
           nword = [prev + word, word]
@@ -101,27 +115,14 @@ class #{@name}
         word
       end
     end.flatten(1)
-    vec << " bias "
-    if (W.size == 1)
-      W[0].values_at(*vec).compact.reduce(:+) > 0.0 ? 0 : 1
-    else
-      W.each_with_index.map {|w,i|
-        [w.values_at(*vec).compact.reduce(:+), i]
-      }.max.pop
-    end
   end
-  def self.k
-    W.size == 1 ? 2 : W.size
-  end
-#{labels}
-  LABELS = #{@labels.inspect}
+#{@classifier.classify_method_code(:ruby)}
-  private
   NGRAM_TARGET = Regexp.new('(^[ァ-ヾ]+$)|(^[a-zA-Z\\-_ａ-ｚＡ-Ｚ‐＿0-9０-９]+$)|' +
                          '(^[々〇ヵヶ' + [0x3400].pack('U') + '-' + [0x9FFF].pack('U') +
                          [0xF900].pack('U') + '-' + [0xFAFF].pack('U') +
-                         [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']+$)')
-  W = JSON.load(#{wv.to_json.inspect})
+                            [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']+$)')
+#{@classifier.parameter_code(:ruby, lambda{|id| id2word(id) })}
 end
 MODEL
       end
@@ -143,8 +144,6 @@ MODEL
     end
     def fv(text)
       vec = Hash.new(0)
-      vec[word2id(" bias ")] = 1
       prev = nil
       words = BimyouSegmenter.segment(text, :white_space => true).map do |word|
         if (prev)
@@ -170,7 +169,7 @@ MODEL
       vec
     end
     def normalize(vec)
-      norm = Math.sqrt(vec.each_value.reduce(0){|a, v| a + v * v })
+      norm = Math.sqrt(vec.values.map{|v| v * v }.reduce(:+))
       if (norm > 0.0)
         s = 1.0 / norm
         vec.each do |k, v|

data/lib/nekoneko_gen/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 module NekonekoGen
-  VERSION = "0.1.1"
+  VERSION = "0.2.1"
 end

data/lib/nekoneko_gen.rb CHANGED Viewed

@@ -5,29 +5,51 @@ require 'optparse'
 require 'fileutils'
 module NekonekoGen
-  DEFAULT_ITERATION = 20
   def self.run(argv)
-    iteration = DEFAULT_ITERATION
+    iteration = nil
     rubyfile = nil
     quiet = false
-    $stdout.sync = true
+    $stdout.sync = true
+    method = nil
+    c = nil
     opt = OptionParser.new do |o|
       o.on('-n NAME', 'new classifier name') do |v|
         rubyfile = File.join(File.dirname(v), File.basename(v, ".*") + ".rb")
         FileUtils.touch(rubyfile)
       end
-      o.on('-i N', "iteration count (default: #{DEFAULT_ITERATION})") do |v|
+      o.on('-i N', "iteration (default: auto)") do |v|
         iteration = v.to_i.abs
       end
+      o.on('-m METHOD', "machine learning method [AROW|PA2|MLP] (default AROW)") do |v|
+        if (v)
+          case v.downcase
+          when 'arow'
+            method = :arow
+          when 'pa1'
+            method = :pa1
+          when 'pa2'
+            method = :pa2
+          when 'mlp'
+            method = :mlp
+          else
+            warn opt
+            return -1
+          end
+        else
+          warn opt
+          return -1
+        end
+      end
+      o.on('-p C', "parameter (default AROW::R=6.0, PA2::C=1.0, MLP::HIDDEN_UNIT=K)") do |v|
+        c = v.to_f
+      end
       o.on('-q', "quiet") do
         quiet = true
       end
     end
     opt.version = NekonekoGen::VERSION
-    opt.banner = "Usage: nekoneko_gen -n classifier_name file1 file2 [files...]"
+    opt.banner = "Usage: nekoneko_gen [OPTIONS] -n NAME FILE1 FILE2 [FILES...]"
     files = opt.parse(argv)
     unless (rubyfile)
@@ -45,7 +67,7 @@ module NekonekoGen
       end
     end
-    gen = NekonekoGen::TextClassifierGenerator.new(rubyfile, files)
+    gen = NekonekoGen::TextClassifierGenerator.new(rubyfile, files, {:method => method, :c => c})
     if (quiet)
       gen.quiet = true
     end

data/test/nekoneko_gen_test.rb CHANGED Viewed

@@ -6,118 +6,146 @@ class NekonekoGenTest < Test::Unit::TestCase
     @file0 = File.join(File.dirname(__FILE__), 'class0.txt')
     @file1 = File.join(File.dirname(__FILE__), 'class1.txt')
     @file2 = File.join(File.dirname(__FILE__), 'class2.txt')
-    @output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_classifier.rb")
-    @output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_classifier.rb")
+    @clean_files = []
   end
   def teardown
-    cleanup!
+    @clean_files.each do |file|
+      if (File.exist?(file))
+        File.unlink(file)
+      end
+    end
   end
-  def cleanup!
-    begin
-      File.unlink(@output_file2)
-    rescue
+  def test_mlp
+    gen2('mlp', {:method => :mlp})
+    gen3('mlp', {:method => :mlp})
+  end
+  def test_pa2
+    gen2('pa2', {:method => :pa2})
+    gen3('pa2', {:method => :pa2})
+  end
+  def test_arow
+    gen2('arow', {:method => :arow})
+    gen3('arow',{:method => :arow})
+  end
+  def clean!(a, b)
+    if (File.exist?(a))
+      File.unlink(a)
     end
-    begin
-      File.unlink(@output_file3)
-    rescue
+    if (File.exist?(b))
+      File.unlink(b)
     end
-  end
+  end
-  def test_gen2
-    cleanup!
+  def gen2(prefix, options)
+    p "---- #{prefix} generate 2class"
+    output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_#{prefix}_classifier.rb")
+    output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_#{prefix}_classifier.rb")
-    gen = NekonekoGen::TextClassifierGenerator.new(@output_file2, [@file0, @file1])
-    #gen.quiet = true
-    gen.train(NekonekoGen::DEFAULT_ITERATION)
-    gen.generate
+    clean!(output_file2, output_file3)
+    @clean_files << output_file2
+    @clean_files << output_file3
-    unless (File.exist?(@output_file2))
-      assert_equal "#{@output_file2} not found", nil
+    gen = NekonekoGen::TextClassifierGenerator.new(output_file2, [@file0, @file1], options)
+    gen.train
+    modname = gen.generate
+    unless (File.exist?(output_file2))
+      assert_equal "#{output_file2} not found", nil
     end
     begin
-      load @output_file2
+      load output_file2
+      mod = Kernel.const_get(modname)
       ok = 0
       count = 0
       File.open(@file0) do |f|
         until f.eof?
-          if (NekonekoTest2Classifier.predict(f.readline) == NekonekoTest2Classifier::CLASS0)
+          if (mod.predict(f.readline) == mod::CLASS0)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest2Classifier::LABELS[0]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[0]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
       ok = 0
       count = 0
       File.open(@file1) do |f|
         until f.eof?
-          if (NekonekoTest2Classifier.predict(f.readline) == NekonekoTest2Classifier::CLASS1)
+          if (mod.predict(f.readline) == mod::CLASS1)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest2Classifier::LABELS[1]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[1]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
     end
   end
-  def test_gen3
-    cleanup!
+  def gen3(prefix, options)
+    p "---- #{prefix} generate 3class"
+    output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_#{prefix}_classifier.rb")
+    output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_#{prefix}_classifier.rb")
+    clean!(output_file2, output_file3)
+    @clean_files << output_file2
+    @clean_files << output_file3
-    gen = NekonekoGen::TextClassifierGenerator.new(@output_file3, [@file0, @file1, @file2])
-    #gen.quiet = true
-    gen.train(NekonekoGen::DEFAULT_ITERATION)
-    gen.generate
+    gen = NekonekoGen::TextClassifierGenerator.new(output_file3,
+                                                   [@file0, @file1, @file2], options)
+    gen.train
+    modname = gen.generate
-    unless (File.exist?(@output_file3))
-      assert_equal "#{@output_file3} not found", nil
+    unless (File.exist?(output_file3))
+      assert_equal "#{output_file3} not found", nil
     end
     begin
-      load @output_file3
+      load output_file3
+      mod = Kernel.const_get(modname)
       ok = 0
       count = 0
       File.open(@file0) do |f|
         until f.eof?
-          if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS0)
+          if (mod.predict(f.readline) == mod::CLASS0)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest3Classifier::LABELS[0]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[0]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
       ok = 0
       count = 0
       File.open(@file1) do |f|
         until f.eof?
-          if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS1)
+          if (mod.predict(f.readline) == mod::CLASS1)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest3Classifier::LABELS[1]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[1]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
       ok = 0
       count = 0
       File.open(@file2) do |f|
         until f.eof?
-          if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS2)
+          if (mod.predict(f.readline) == mod::CLASS2)
             ok += 1
           end
           count += 1
         end
       end
-      puts "#{NekonekoTest3Classifier::LABELS[2]}: #{ok.to_f / count}"
+      puts "#{mod::LABELS[2]}: #{ok.to_f / count}"
       assert ok.to_f / count > 0.9
     end
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: nekoneko_gen
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.1
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-05-29 00:00:00.000000000Z
+date: 2012-06-01 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bimyou_segmenter
-  requirement: &7671220 !ruby/object:Gem::Requirement
+  requirement: &14306440 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *7671220
+  version_requirements: *14306440
 - !ruby/object:Gem::Dependency
   name: json
-  requirement: &7668960 !ruby/object:Gem::Requirement
+  requirement: &14304220 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *7668960
+  version_requirements: *14304220
 - !ruby/object:Gem::Dependency
   name: test-unit
-  requirement: &7641320 !ruby/object:Gem::Requirement
+  requirement: &14303060 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *7641320
+  version_requirements: *14303060
 description: Japanese Text Classifier Generator
 email:
 - nagadomi@nurs.or.jp
@@ -60,6 +60,11 @@ files:
 - bin/nekoneko_gen
 - lib/nekoneko_gen.rb
 - lib/nekoneko_gen/arow.rb
+- lib/nekoneko_gen/classifier.rb
+- lib/nekoneko_gen/classifier_factory.rb
+- lib/nekoneko_gen/linear_classifier.rb
+- lib/nekoneko_gen/mlp.rb
+- lib/nekoneko_gen/pa.rb
 - lib/nekoneko_gen/text_classifier_generator.rb
 - lib/nekoneko_gen/version.rb
 - nekoneko_gen.gemspec