nekoneko_gen 0.1.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,72 +1,56 @@
1
1
  # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'linear_classifier'))
3
+
2
4
  module NekonekoGen
3
- class Arow
5
+ # Adaptive Regularization of Weight Vector
6
+ class Arow < LinearClassifier
4
7
  R = 6.0
5
- attr_accessor :k, :w
8
+ DEFAULT_ITERATION = 20
9
+
6
10
  def initialize(k, options = {})
7
- @r = options[:r] || R
11
+ @r = options[:c] || R
8
12
  @k = k
9
13
  @cov = []
14
+ @covb = []
10
15
  @w = []
16
+ @bias = []
11
17
  if (@k == 2)
12
18
  @cov[0] = Hash.new(1.0)
13
19
  @w[0] = Hash.new(0.0)
20
+ @covb[0] = 1.0
21
+ @bias[0] = 0.0
14
22
  else
15
23
  k.times do |i|
16
24
  @cov[i] = Hash.new(1.0)
17
25
  @w[i] = Hash.new(0.0)
26
+ @covb[i] = 1.0
27
+ @bias[i] = 0.0
18
28
  end
19
29
  end
20
30
  end
21
- def update(vec, label)
22
- loss = 0.0
23
- if (@k == 2)
24
- loss = update_at(0, vec, label)
25
- else
26
- nega = rand(@k - 1)
27
- if (nega == label)
28
- nega += 1
29
- end
30
- s = 1.0 / @k
31
- @k.times do |i|
32
- loss += update_at(i, vec, label) * s
33
- end
34
- end
35
- loss
36
- end
37
- def strip!
38
- @w.each do |w|
39
- w.reject!{|k,v| v.abs <= Float::EPSILON }
40
- end
41
- @w
42
- end
43
-
44
- private
45
- def dot(vec, w)
46
- dot = 0.0
47
- vec.each do |k, v|
48
- if (a = w[k])
49
- dot += a * v
50
- end
51
- end
52
- dot
53
- end
54
31
  def update_at(i, vec, label)
55
32
  w = @w[i]
56
33
  cov = @cov[i]
34
+ covb = @covb[i]
35
+ bias = @bias[i]
57
36
  y = label == i ? 1 : -1
58
- score = dot(vec, w)
37
+ score = bias + dot(vec, w)
59
38
  alpha = 1.0 - y * score
60
39
  if (alpha > 0.0)
61
40
  r_inv= 1.0 / @r
62
- var = vec.map {|k, v| cov[k] * v * v }.reduce(:+)
41
+ var = vec.map{|k, v| cov[k] * v * v }.reduce(:+) + covb
63
42
  alpha *= (1.0 / (var + @r)) * y
64
43
  vec.each do |k, v|
65
44
  w[k] += alpha * cov[k] * v
66
45
  cov[k] = 1.0 / ((1.0 / cov[k]) + (v * v * r_inv))
67
46
  end
47
+ @bias[i] += alpha * covb
48
+ @covb[i] = 1.0 / ((1.0 / covb) + r_inv)
68
49
  end
69
50
  score * y < 0.0 ? 1.0 : 0.0
70
51
  end
52
+ def default_iteration
53
+ DEFAULT_ITERATION
54
+ end
71
55
  end
72
56
  end
@@ -0,0 +1,21 @@
1
+ # -*- coding: utf-8 -*-
2
+ module NekonekoGen
3
+ class Classifier
4
+ attr_reader :k
5
+ def parameter_code(index_converter = nil)
6
+ raise NotImplementedError
7
+ end
8
+ def classify_method_code
9
+ raise NotImplementedError
10
+ end
11
+ def update(vec, label)
12
+ raise NotImplementedError
13
+ end
14
+ def features(i = -1)
15
+ raise NotImplementedError
16
+ end
17
+ def default_iteration
18
+ raise NotImplementedError
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,22 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'arow'))
3
+ require File.expand_path(File.join(File.dirname(__FILE__), 'pa'))
4
+ require File.expand_path(File.join(File.dirname(__FILE__), 'mlp'))
5
+
6
+ module NekonekoGen
7
+ module ClassifierFactory
8
+ def self.create(k, options)
9
+ method = options[:method] || :arow
10
+ case (method)
11
+ when :arow
12
+ Arow.new(k, options)
13
+ when :pa, :pa1, :pa2
14
+ PA.new(k, options)
15
+ when :mlp
16
+ MLP.new(k, options)
17
+ else
18
+ raise ArgumentError
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,86 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'json'
3
+ require File.expand_path(File.join(File.dirname(__FILE__), 'classifier'))
4
+
5
+ module NekonekoGen
6
+ class LinearClassifier < Classifier
7
+ attr_reader :w, :bias
8
+ def dot(vec, w)
9
+ dot = 0.0
10
+ vec.each do |k, v|
11
+ if (a = w[k])
12
+ dot += a * v
13
+ end
14
+ end
15
+ dot
16
+ end
17
+ def strip!
18
+ @w.each {|w|
19
+ w.reject!{|k,v|
20
+ if (v.abs < Float::EPSILON)
21
+ # p v
22
+ true
23
+ else
24
+ false
25
+ end
26
+ }
27
+ }
28
+ @w
29
+ end
30
+ def update(vec, label)
31
+ loss = 0.0
32
+ if (@k == 2)
33
+ loss = update_at(0, vec, label)
34
+ else
35
+ s = 1.0 / @k
36
+ @k.times do |i|
37
+ loss += update_at(i, vec, label) * s
38
+ end
39
+ end
40
+ loss
41
+ end
42
+ def features(i = -1)
43
+ if (i < 0)
44
+ w.reduce(0){|sum, v| sum + v.size }
45
+ else
46
+ w[i].size
47
+ end
48
+ end
49
+ def parameter_code(lang, index_converter = lambda{|i| i})
50
+ lang ||= :ruby
51
+ case lang
52
+ when :ruby
53
+ else
54
+ raise NotImplementedError
55
+ end
56
+
57
+ wvec = self.strip!.map {|w|
58
+ w.reduce({}) {|h, kv| h[index_converter.call(kv[0])] = kv[1]; h }
59
+ }
60
+ <<CODE
61
+ BIAS = #{self.bias.inspect}
62
+ W = JSON.load(#{wvec.to_json.inspect})
63
+ CODE
64
+ end
65
+ def classify_method_code(lang)
66
+ lang ||= :ruby
67
+ case lang
68
+ when :ruby
69
+ else
70
+ raise NotImplementedError
71
+ end
72
+
73
+ <<CODE
74
+ def self.classify(vec)
75
+ if (K == 2)
76
+ BIAS[0] + W[0].values_at(*vec).compact.reduce(0.0, :+) > 0.0 ? 0 : 1
77
+ else
78
+ W.each_with_index.map {|w, i|
79
+ [BIAS[i] + w.values_at(*vec).compact.reduce(0.0, :+), i]
80
+ }.max.pop
81
+ end
82
+ end
83
+ CODE
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,176 @@
1
+ require 'json'
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'classifier'))
3
+
4
+ module NekonekoGen
5
+ # Multi Layer Perceptron
6
+ class MLP < Classifier
7
+ IR = 0.4
8
+ HR = 0.1
9
+ NOISE_VAR = 0.3
10
+ MARGIN = 0.2
11
+ DEFAULT_ITERATION = 40
12
+
13
+ def default_hidden_unit
14
+ @k
15
+ end
16
+ def initialize(k, options)
17
+ @k = k
18
+ @output_units = @k == 2 ? 1 : @k
19
+ @hidden_units = (options[:c] || default_hidden_unit).to_i
20
+ @input = []
21
+ @hidden = []
22
+ @input_bias = []
23
+ @hidden_bias = []
24
+ @hidden_units.times do |i|
25
+ @input[i] = Hash.new {|hash, key| hash[key] = default_value }
26
+ @input_bias[i] = default_value
27
+ end
28
+ @output_units.times do |i|
29
+ @hidden[i] = []
30
+ @hidden_units.times do |j|
31
+ @hidden[i][j] = default_value
32
+ end
33
+ @hidden_bias[i] = default_value
34
+ end
35
+ end
36
+ def update(vec, label)
37
+ input_y = []
38
+ hidden_y = []
39
+ output_y = []
40
+
41
+ input_y = @hidden_units.times.map do |i|
42
+ w = @input[i]
43
+ sigmoid(@input_bias[i] + vec.map{|k, v| w[k] * v}.reduce(:+) + noise)
44
+ end
45
+ hidden_y = @output_units.times.map do |i|
46
+ @hidden_bias[i] + input_y.zip(@hidden[i]).map{|a, b| a * b }.reduce(:+)
47
+ end
48
+ output_y = @output_units.times.map do |i|
49
+ sigmoid(hidden_y[i])
50
+ end
51
+
52
+ loss = 0.0
53
+ dotrain = false
54
+ if (@output_units == 1)
55
+ if (output_y[0] > 0.5)
56
+ l = 0
57
+ else
58
+ l = 1
59
+ end
60
+ if (label == 0)
61
+ if (output_y[0] < 1.0 - MARGIN)
62
+ dotrain = true
63
+ end
64
+ else
65
+ if (output_y[0] > MARGIN)
66
+ dotrain = true
67
+ end
68
+ end
69
+ loss = (label == l) ? 0.0 : 1.0
70
+ else
71
+ max_p, l = output_y.each_with_index.max
72
+ if (l == label)
73
+ if (max_p < 1.0 - MARGIN)
74
+ dotrain = true
75
+ end
76
+ else
77
+ loss = 1.0
78
+ dotrain = true
79
+ end
80
+ end
81
+ if (dotrain)
82
+ output_bp = @output_units.times.map do |i|
83
+ y = hidden_y[i]
84
+ yt = (label == i) ? 1.0 : 0.0
85
+ expy = Math.exp(y)
86
+ -((2.0 * yt - 1.0) * expy + yt) / (Math.exp(2.0 * y) + 2.0 * expy + 1.0)
87
+ end
88
+ hidden_bp = @hidden_units.times.map do |j|
89
+ y = 0.0
90
+ @output_units.times do |i|
91
+ y += output_bp[i] * @hidden[i][j]
92
+ end
93
+ y * (1.0 - input_y[j]) * input_y[j]
94
+ end
95
+ @output_units.times do |j|
96
+ hidden = @hidden[j]
97
+ @hidden_units.times do |i|
98
+ hidden[i] -= HR * input_y[i] * output_bp[j]
99
+ end
100
+ @hidden_bias[j] -= HR * output_bp[j]
101
+ end
102
+ @hidden_units.times do |i|
103
+ input = @input[i]
104
+ vec.each do |k, v|
105
+ input[k] -= IR * v * hidden_bp[i]
106
+ end
107
+ @input_bias[i] -= IR * hidden_bp[i]
108
+ end
109
+ end
110
+ loss
111
+ end
112
+ def features(i = -1)
113
+ @input.map{|v| v.size }.reduce(:+)
114
+ end
115
+ def sigmoid(a)
116
+ 1.0 / (1.0 + Math.exp(-a))
117
+ end
118
+ def default_value
119
+ (rand - 0.5)
120
+ end
121
+ def noise
122
+ (Math.sqrt(-2.0 * Math.log(rand)) * Math.sin(2.0 * Math::PI * rand)) * NOISE_VAR
123
+ end
124
+ def default_iteration
125
+ DEFAULT_ITERATION
126
+ end
127
+ def parameter_code(lang, index_converter = lambda{|i| i})
128
+ lang ||= :ruby
129
+ case lang
130
+ when :ruby
131
+ else
132
+ raise NotImplementedError
133
+ end
134
+
135
+ wvec = @input.map {|w|
136
+ w.reduce({}) {|h, kv| h[index_converter.call(kv[0])] = kv[1]; h }
137
+ }
138
+ <<CODE
139
+ HIDDEN_UNITS = #{@hidden_units}
140
+ INPUT_BIAS = #{@input_bias.inspect}
141
+ HIDDEN_BIAS = #{@hidden_bias.inspect}
142
+ INPUT_W = JSON.load(#{wvec.to_json.inspect})
143
+ HIDDEN_W = #{@hidden.inspect}
144
+ CODE
145
+ end
146
+ def classify_method_code(lang)
147
+ lang ||= :ruby
148
+ case lang
149
+ when :ruby
150
+ else
151
+ raise NotImplementedError
152
+ end
153
+ <<CODE
154
+ def self.classify(vec)
155
+ input_y = []
156
+ output_y = []
157
+ HIDDEN_UNITS.times do |i|
158
+ input_y[i] = sigmoid(INPUT_BIAS[i] +
159
+ INPUT_W[i].values_at(*vec).compact.reduce(0.0, :+))
160
+ end
161
+ if (K == 2)
162
+ HIDDEN_BIAS[0] +
163
+ input_y.zip(HIDDEN_W[0]).map{|a, b| a * b }.reduce(:+) > 0.0 ? 0 : 1
164
+ else
165
+ K.times.map{|i|
166
+ [HIDDEN_BIAS[i] + input_y.zip(HIDDEN_W[i]).map{|a, b| a * b }.reduce(:+), i]
167
+ }.max.pop
168
+ end
169
+ end
170
+ def self.sigmoid(a)
171
+ 1.0 / (1.0 + Math.exp(-a))
172
+ end
173
+ CODE
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,68 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'linear_classifier'))
3
+
4
+ module NekonekoGen
5
+ # Passive Agressive
6
+ class PA < LinearClassifier
7
+ C = 1.0
8
+ NORM = 2.0 # norm + BIAS
9
+ DEFAULT_ITERATION = 20
10
+
11
+ def initialize(k, options = {})
12
+ @k = k
13
+ @c = options[:c] || C
14
+ @w = []
15
+ @bias = []
16
+ if (@k == 2)
17
+ @w[0] = Hash.new(0.0)
18
+ @bias[0] = 0.0
19
+ else
20
+ k.times do |i|
21
+ @w[i] = Hash.new(0.0)
22
+ @bias[i] = 0.0
23
+ end
24
+ end
25
+ if options[:method]
26
+ @tau =
27
+ case options[:method]
28
+ when :pa
29
+ lambda{|y, l| pa(y, l)}
30
+ when :pa1
31
+ lambda{|y, l| pa1(y, l)}
32
+ when :pa2
33
+ lambda{|y, l| pa2(y, l)}
34
+ else
35
+ lambda{|y, l| pa2(y, l)}
36
+ end
37
+ else
38
+ @tau = lambda{|y, l| pa2(y, l)}
39
+ end
40
+ end
41
+ def pa2(y, l)
42
+ y * (l / NORM + 0.5 / @c)
43
+ end
44
+ def pa1(y, l)
45
+ y * [@c, (l / NORM)].min
46
+ end
47
+ def pa(y, l)
48
+ y * l / NORM
49
+ end
50
+ def update_at(i, vec, label)
51
+ y = label == i ? 1 : -1
52
+ w = @w[i]
53
+ score = @bias[i] + dot(vec, w)
54
+ l = 1.0 - score * y
55
+ if (l > 0.0)
56
+ alpha = @tau.call(y, l)
57
+ vec.each do |k, v|
58
+ w[k] += alpha * v
59
+ end
60
+ @bias[i] += alpha
61
+ end
62
+ y * score < 0.0 ? 1.0 : 0.0
63
+ end
64
+ def default_iteration
65
+ DEFAULT_ITERATION
66
+ end
67
+ end
68
+ end
@@ -1,9 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
- require 'json'
3
2
  require 'nkf'
4
3
  require 'bimyou_segmenter'
5
4
 
6
- require File.expand_path(File.join(File.dirname(__FILE__), 'arow'))
5
+ require File.expand_path(File.join(File.dirname(__FILE__), 'classifier_factory'))
7
6
 
8
7
  module NekonekoGen
9
8
  class TextClassifierGenerator
@@ -15,16 +14,15 @@ module NekonekoGen
15
14
  @files = files
16
15
  @word2id = {}
17
16
  @id2word = {}
18
- @arow = Arow.new(files.size, options)
19
-
17
+ @classifier = ClassifierFactory.create(files.size, options)
20
18
  @name = safe_name(@filename).split("_").map(&:capitalize).join
21
19
  @labels = files.map {|file| "#{safe_name(file).upcase}"}
22
20
  end
23
21
 
24
- def train(iteration = 20)
25
- iteration ||= 20
22
+ def train(iteration = nil)
23
+ iteration ||= @classifier.default_iteration
26
24
  data = []
27
- @arow.k.times do |i|
25
+ @classifier.k.times do |i|
28
26
  t = Time.now
29
27
  data[i] = []
30
28
  print "loading #{@files[i]}... "
@@ -49,31 +47,35 @@ module NekonekoGen
49
47
  t = Time.now
50
48
  print sprintf("step %3d...", step)
51
49
 
52
- @arow.k.times.map do |i|
50
+ @classifier.k.times.map do |i|
53
51
  sampling(data[i], samples).map {|vec| [vec, i] }
54
52
  end.flatten(1).shuffle!.each do |v|
55
- loss += @arow.update(v[0], v[1])
53
+ loss += @classifier.update(v[0], v[1])
56
54
  c += 1
57
55
  end
58
56
  print sprintf(" %.6f, %.4fs\n", 1.0 - loss / c.to_f, Time.now - t)
59
57
  end
60
- @arow.strip!
61
-
62
- if (@arow.k > 2)
63
- @arow.w.each_with_index do |w, i|
64
- puts "#{@labels[i]} : #{w.size} features"
58
+ if (@classifier.k > 2)
59
+ @classifier.k.times do |i|
60
+ puts "#{@labels[i]} : #{@classifier.features(i)} features"
65
61
  end
66
62
  else
67
- puts "#{@labels[0]}, #{@labels[1]} : #{@arow.w[0].size} features"
63
+ puts "#{@labels[0]}, #{@labels[1]} : #{@classifier.features(0)} features"
68
64
  end
69
65
  puts "done nyan! "
70
66
  end
71
- def generate
72
- wv = @arow.w.map {|w|
73
- w.reduce({}) {|h, kv| h[id2word(kv[0])] = kv[1]; h }
74
- }
67
+ def generate(lang = :ruby)
68
+ lang ||= :ruby
69
+ case lang
70
+ when :ruby
71
+ generate_ruby_code
72
+ else
73
+ raise NotImplementedError
74
+ end
75
+ @name
76
+ end
77
+ def generate_ruby_code
75
78
  labels = @labels.each_with_index.map{|v, i| " #{v} = #{i}"}.join("\n")
76
-
77
79
  File.open(@filename, "w") do |f|
78
80
  f.write <<MODEL
79
81
  # -*- coding: utf-8 -*-
@@ -82,9 +84,21 @@ require 'json'
82
84
  require 'bimyou_segmenter'
83
85
 
84
86
  class #{@name}
87
+ def self.k
88
+ K
89
+ end
85
90
  def self.predict(text)
91
+ classify(fv(text))
92
+ end
93
+
94
+ #{labels}
95
+ LABELS = #{@labels.inspect}
96
+ K = #{@classifier.k}
97
+
98
+ private
99
+ def self.fv(text)
86
100
  prev = nil
87
- vec = BimyouSegmenter.segment(text).map do |word|
101
+ BimyouSegmenter.segment(text).map do |word|
88
102
  if (prev)
89
103
  if (NGRAM_TARGET =~ word)
90
104
  nword = [prev + word, word]
@@ -101,27 +115,14 @@ class #{@name}
101
115
  word
102
116
  end
103
117
  end.flatten(1)
104
- vec << " bias "
105
- if (W.size == 1)
106
- W[0].values_at(*vec).compact.reduce(:+) > 0.0 ? 0 : 1
107
- else
108
- W.each_with_index.map {|w,i|
109
- [w.values_at(*vec).compact.reduce(:+), i]
110
- }.max.pop
111
- end
112
118
  end
113
- def self.k
114
- W.size == 1 ? 2 : W.size
115
- end
116
- #{labels}
117
- LABELS = #{@labels.inspect}
119
+ #{@classifier.classify_method_code(:ruby)}
118
120
 
119
- private
120
121
  NGRAM_TARGET = Regexp.new('(^[ァ-ヾ]+$)|(^[a-zA-Z\\-_a-zA-Z‐_0-90-9]+$)|' +
121
122
  '(^[々〇ヵヶ' + [0x3400].pack('U') + '-' + [0x9FFF].pack('U') +
122
123
  [0xF900].pack('U') + '-' + [0xFAFF].pack('U') +
123
- [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']+$)')
124
- W = JSON.load(#{wv.to_json.inspect})
124
+ [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']+$)')
125
+ #{@classifier.parameter_code(:ruby, lambda{|id| id2word(id) })}
125
126
  end
126
127
  MODEL
127
128
  end
@@ -143,8 +144,6 @@ MODEL
143
144
  end
144
145
  def fv(text)
145
146
  vec = Hash.new(0)
146
- vec[word2id(" bias ")] = 1
147
-
148
147
  prev = nil
149
148
  words = BimyouSegmenter.segment(text, :white_space => true).map do |word|
150
149
  if (prev)
@@ -170,7 +169,7 @@ MODEL
170
169
  vec
171
170
  end
172
171
  def normalize(vec)
173
- norm = Math.sqrt(vec.each_value.reduce(0){|a, v| a + v * v })
172
+ norm = Math.sqrt(vec.values.map{|v| v * v }.reduce(:+))
174
173
  if (norm > 0.0)
175
174
  s = 1.0 / norm
176
175
  vec.each do |k, v|
@@ -1,4 +1,4 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  module NekonekoGen
3
- VERSION = "0.1.1"
3
+ VERSION = "0.2.1"
4
4
  end
data/lib/nekoneko_gen.rb CHANGED
@@ -5,29 +5,51 @@ require 'optparse'
5
5
  require 'fileutils'
6
6
 
7
7
  module NekonekoGen
8
- DEFAULT_ITERATION = 20
9
-
10
8
  def self.run(argv)
11
- iteration = DEFAULT_ITERATION
9
+ iteration = nil
12
10
  rubyfile = nil
13
11
  quiet = false
14
-
15
- $stdout.sync = true
16
12
 
13
+ $stdout.sync = true
14
+ method = nil
15
+ c = nil
17
16
  opt = OptionParser.new do |o|
18
17
  o.on('-n NAME', 'new classifier name') do |v|
19
18
  rubyfile = File.join(File.dirname(v), File.basename(v, ".*") + ".rb")
20
19
  FileUtils.touch(rubyfile)
21
20
  end
22
- o.on('-i N', "iteration count (default: #{DEFAULT_ITERATION})") do |v|
21
+ o.on('-i N', "iteration (default: auto)") do |v|
23
22
  iteration = v.to_i.abs
24
23
  end
24
+ o.on('-m METHOD', "machine learning method [AROW|PA2|MLP] (default AROW)") do |v|
25
+ if (v)
26
+ case v.downcase
27
+ when 'arow'
28
+ method = :arow
29
+ when 'pa1'
30
+ method = :pa1
31
+ when 'pa2'
32
+ method = :pa2
33
+ when 'mlp'
34
+ method = :mlp
35
+ else
36
+ warn opt
37
+ return -1
38
+ end
39
+ else
40
+ warn opt
41
+ return -1
42
+ end
43
+ end
44
+ o.on('-p C', "parameter (default AROW::R=6.0, PA2::C=1.0, MLP::HIDDEN_UNIT=K)") do |v|
45
+ c = v.to_f
46
+ end
25
47
  o.on('-q', "quiet") do
26
48
  quiet = true
27
49
  end
28
50
  end
29
51
  opt.version = NekonekoGen::VERSION
30
- opt.banner = "Usage: nekoneko_gen -n classifier_name file1 file2 [files...]"
52
+ opt.banner = "Usage: nekoneko_gen [OPTIONS] -n NAME FILE1 FILE2 [FILES...]"
31
53
  files = opt.parse(argv)
32
54
 
33
55
  unless (rubyfile)
@@ -45,7 +67,7 @@ module NekonekoGen
45
67
  end
46
68
  end
47
69
 
48
- gen = NekonekoGen::TextClassifierGenerator.new(rubyfile, files)
70
+ gen = NekonekoGen::TextClassifierGenerator.new(rubyfile, files, {:method => method, :c => c})
49
71
  if (quiet)
50
72
  gen.quiet = true
51
73
  end
@@ -6,118 +6,146 @@ class NekonekoGenTest < Test::Unit::TestCase
6
6
  @file0 = File.join(File.dirname(__FILE__), 'class0.txt')
7
7
  @file1 = File.join(File.dirname(__FILE__), 'class1.txt')
8
8
  @file2 = File.join(File.dirname(__FILE__), 'class2.txt')
9
- @output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_classifier.rb")
10
- @output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_classifier.rb")
9
+ @clean_files = []
11
10
  end
12
11
  def teardown
13
- cleanup!
12
+ @clean_files.each do |file|
13
+ if (File.exist?(file))
14
+ File.unlink(file)
15
+ end
16
+ end
14
17
  end
15
- def cleanup!
16
- begin
17
- File.unlink(@output_file2)
18
- rescue
18
+
19
+ def test_mlp
20
+ gen2('mlp', {:method => :mlp})
21
+ gen3('mlp', {:method => :mlp})
22
+ end
23
+ def test_pa2
24
+ gen2('pa2', {:method => :pa2})
25
+ gen3('pa2', {:method => :pa2})
26
+ end
27
+ def test_arow
28
+ gen2('arow', {:method => :arow})
29
+ gen3('arow',{:method => :arow})
30
+ end
31
+
32
+ def clean!(a, b)
33
+ if (File.exist?(a))
34
+ File.unlink(a)
19
35
  end
20
- begin
21
- File.unlink(@output_file3)
22
- rescue
36
+ if (File.exist?(b))
37
+ File.unlink(b)
23
38
  end
24
- end
39
+ end
25
40
 
26
- def test_gen2
27
- cleanup!
41
+ def gen2(prefix, options)
42
+ p "---- #{prefix} generate 2class"
43
+ output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_#{prefix}_classifier.rb")
44
+ output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_#{prefix}_classifier.rb")
28
45
 
29
- gen = NekonekoGen::TextClassifierGenerator.new(@output_file2, [@file0, @file1])
30
- #gen.quiet = true
31
- gen.train(NekonekoGen::DEFAULT_ITERATION)
32
- gen.generate
46
+ clean!(output_file2, output_file3)
47
+ @clean_files << output_file2
48
+ @clean_files << output_file3
33
49
 
34
- unless (File.exist?(@output_file2))
35
- assert_equal "#{@output_file2} not found", nil
50
+ gen = NekonekoGen::TextClassifierGenerator.new(output_file2, [@file0, @file1], options)
51
+ gen.train
52
+ modname = gen.generate
53
+
54
+ unless (File.exist?(output_file2))
55
+ assert_equal "#{output_file2} not found", nil
36
56
  end
37
57
 
38
58
  begin
39
- load @output_file2
59
+ load output_file2
40
60
 
61
+ mod = Kernel.const_get(modname)
41
62
  ok = 0
42
63
  count = 0
43
64
  File.open(@file0) do |f|
44
65
  until f.eof?
45
- if (NekonekoTest2Classifier.predict(f.readline) == NekonekoTest2Classifier::CLASS0)
66
+ if (mod.predict(f.readline) == mod::CLASS0)
46
67
  ok += 1
47
68
  end
48
69
  count += 1
49
70
  end
50
71
  end
51
- puts "#{NekonekoTest2Classifier::LABELS[0]}: #{ok.to_f / count}"
72
+ puts "#{mod::LABELS[0]}: #{ok.to_f / count}"
52
73
  assert ok.to_f / count > 0.9
53
-
74
+
54
75
  ok = 0
55
76
  count = 0
56
77
  File.open(@file1) do |f|
57
78
  until f.eof?
58
- if (NekonekoTest2Classifier.predict(f.readline) == NekonekoTest2Classifier::CLASS1)
79
+ if (mod.predict(f.readline) == mod::CLASS1)
59
80
  ok += 1
60
81
  end
61
82
  count += 1
62
83
  end
63
84
  end
64
- puts "#{NekonekoTest2Classifier::LABELS[1]}: #{ok.to_f / count}"
85
+ puts "#{mod::LABELS[1]}: #{ok.to_f / count}"
65
86
  assert ok.to_f / count > 0.9
66
87
  end
67
88
  end
68
89
 
69
- def test_gen3
70
- cleanup!
90
+ def gen3(prefix, options)
91
+ p "---- #{prefix} generate 3class"
92
+ output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_#{prefix}_classifier.rb")
93
+ output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_#{prefix}_classifier.rb")
94
+
95
+ clean!(output_file2, output_file3)
96
+ @clean_files << output_file2
97
+ @clean_files << output_file3
71
98
 
72
- gen = NekonekoGen::TextClassifierGenerator.new(@output_file3, [@file0, @file1, @file2])
73
- #gen.quiet = true
74
- gen.train(NekonekoGen::DEFAULT_ITERATION)
75
- gen.generate
99
+ gen = NekonekoGen::TextClassifierGenerator.new(output_file3,
100
+ [@file0, @file1, @file2], options)
101
+ gen.train
102
+ modname = gen.generate
76
103
 
77
- unless (File.exist?(@output_file3))
78
- assert_equal "#{@output_file3} not found", nil
104
+ unless (File.exist?(output_file3))
105
+ assert_equal "#{output_file3} not found", nil
79
106
  end
80
107
 
81
108
  begin
82
- load @output_file3
83
-
109
+ load output_file3
110
+
111
+ mod = Kernel.const_get(modname)
84
112
  ok = 0
85
113
  count = 0
86
114
  File.open(@file0) do |f|
87
115
  until f.eof?
88
- if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS0)
116
+ if (mod.predict(f.readline) == mod::CLASS0)
89
117
  ok += 1
90
118
  end
91
119
  count += 1
92
120
  end
93
121
  end
94
- puts "#{NekonekoTest3Classifier::LABELS[0]}: #{ok.to_f / count}"
122
+ puts "#{mod::LABELS[0]}: #{ok.to_f / count}"
95
123
  assert ok.to_f / count > 0.9
96
124
 
97
125
  ok = 0
98
126
  count = 0
99
127
  File.open(@file1) do |f|
100
128
  until f.eof?
101
- if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS1)
129
+ if (mod.predict(f.readline) == mod::CLASS1)
102
130
  ok += 1
103
131
  end
104
132
  count += 1
105
133
  end
106
134
  end
107
- puts "#{NekonekoTest3Classifier::LABELS[1]}: #{ok.to_f / count}"
135
+ puts "#{mod::LABELS[1]}: #{ok.to_f / count}"
108
136
  assert ok.to_f / count > 0.9
109
137
 
110
138
  ok = 0
111
139
  count = 0
112
140
  File.open(@file2) do |f|
113
141
  until f.eof?
114
- if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS2)
142
+ if (mod.predict(f.readline) == mod::CLASS2)
115
143
  ok += 1
116
144
  end
117
145
  count += 1
118
146
  end
119
147
  end
120
- puts "#{NekonekoTest3Classifier::LABELS[2]}: #{ok.to_f / count}"
148
+ puts "#{mod::LABELS[2]}: #{ok.to_f / count}"
121
149
  assert ok.to_f / count > 0.9
122
150
  end
123
151
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nekoneko_gen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-29 00:00:00.000000000Z
12
+ date: 2012-06-01 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bimyou_segmenter
16
- requirement: &7671220 !ruby/object:Gem::Requirement
16
+ requirement: &14306440 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *7671220
24
+ version_requirements: *14306440
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: json
27
- requirement: &7668960 !ruby/object:Gem::Requirement
27
+ requirement: &14304220 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *7668960
35
+ version_requirements: *14304220
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: test-unit
38
- requirement: &7641320 !ruby/object:Gem::Requirement
38
+ requirement: &14303060 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *7641320
46
+ version_requirements: *14303060
47
47
  description: Japanese Text Classifier Generator
48
48
  email:
49
49
  - nagadomi@nurs.or.jp
@@ -60,6 +60,11 @@ files:
60
60
  - bin/nekoneko_gen
61
61
  - lib/nekoneko_gen.rb
62
62
  - lib/nekoneko_gen/arow.rb
63
+ - lib/nekoneko_gen/classifier.rb
64
+ - lib/nekoneko_gen/classifier_factory.rb
65
+ - lib/nekoneko_gen/linear_classifier.rb
66
+ - lib/nekoneko_gen/mlp.rb
67
+ - lib/nekoneko_gen/pa.rb
63
68
  - lib/nekoneko_gen/text_classifier_generator.rb
64
69
  - lib/nekoneko_gen/version.rb
65
70
  - nekoneko_gen.gemspec