nekoneko_gen 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,72 +1,56 @@
1
1
  # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'linear_classifier'))
3
+
2
4
  module NekonekoGen
3
- class Arow
5
+ # Adaptive Regularization of Weight Vector
6
+ class Arow < LinearClassifier
4
7
  R = 6.0
5
- attr_accessor :k, :w
8
+ DEFAULT_ITERATION = 20
9
+
6
10
  def initialize(k, options = {})
7
- @r = options[:r] || R
11
+ @r = options[:c] || R
8
12
  @k = k
9
13
  @cov = []
14
+ @covb = []
10
15
  @w = []
16
+ @bias = []
11
17
  if (@k == 2)
12
18
  @cov[0] = Hash.new(1.0)
13
19
  @w[0] = Hash.new(0.0)
20
+ @covb[0] = 1.0
21
+ @bias[0] = 0.0
14
22
  else
15
23
  k.times do |i|
16
24
  @cov[i] = Hash.new(1.0)
17
25
  @w[i] = Hash.new(0.0)
26
+ @covb[i] = 1.0
27
+ @bias[i] = 0.0
18
28
  end
19
29
  end
20
30
  end
21
- def update(vec, label)
22
- loss = 0.0
23
- if (@k == 2)
24
- loss = update_at(0, vec, label)
25
- else
26
- nega = rand(@k - 1)
27
- if (nega == label)
28
- nega += 1
29
- end
30
- s = 1.0 / @k
31
- @k.times do |i|
32
- loss += update_at(i, vec, label) * s
33
- end
34
- end
35
- loss
36
- end
37
- def strip!
38
- @w.each do |w|
39
- w.reject!{|k,v| v.abs <= Float::EPSILON }
40
- end
41
- @w
42
- end
43
-
44
- private
45
- def dot(vec, w)
46
- dot = 0.0
47
- vec.each do |k, v|
48
- if (a = w[k])
49
- dot += a * v
50
- end
51
- end
52
- dot
53
- end
54
31
  def update_at(i, vec, label)
55
32
  w = @w[i]
56
33
  cov = @cov[i]
34
+ covb = @covb[i]
35
+ bias = @bias[i]
57
36
  y = label == i ? 1 : -1
58
- score = dot(vec, w)
37
+ score = bias + dot(vec, w)
59
38
  alpha = 1.0 - y * score
60
39
  if (alpha > 0.0)
61
40
  r_inv= 1.0 / @r
62
- var = vec.map {|k, v| cov[k] * v * v }.reduce(:+)
41
+ var = vec.map{|k, v| cov[k] * v * v }.reduce(:+) + covb
63
42
  alpha *= (1.0 / (var + @r)) * y
64
43
  vec.each do |k, v|
65
44
  w[k] += alpha * cov[k] * v
66
45
  cov[k] = 1.0 / ((1.0 / cov[k]) + (v * v * r_inv))
67
46
  end
47
+ @bias[i] += alpha * covb
48
+ @covb[i] = 1.0 / ((1.0 / covb) + r_inv)
68
49
  end
69
50
  score * y < 0.0 ? 1.0 : 0.0
70
51
  end
52
+ def default_iteration
53
+ DEFAULT_ITERATION
54
+ end
71
55
  end
72
56
  end
@@ -0,0 +1,21 @@
1
+ # -*- coding: utf-8 -*-
2
+ module NekonekoGen
3
+ class Classifier
4
+ attr_reader :k
5
+ def parameter_code(index_converter = nil)
6
+ raise NotImplementedError
7
+ end
8
+ def classify_method_code
9
+ raise NotImplementedError
10
+ end
11
+ def update(vec, label)
12
+ raise NotImplementedError
13
+ end
14
+ def features(i = -1)
15
+ raise NotImplementedError
16
+ end
17
+ def default_iteration
18
+ raise NotImplementedError
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,22 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'arow'))
3
+ require File.expand_path(File.join(File.dirname(__FILE__), 'pa'))
4
+ require File.expand_path(File.join(File.dirname(__FILE__), 'mlp'))
5
+
6
+ module NekonekoGen
7
+ module ClassifierFactory
8
+ def self.create(k, options)
9
+ method = options[:method] || :arow
10
+ case (method)
11
+ when :arow
12
+ Arow.new(k, options)
13
+ when :pa, :pa1, :pa2
14
+ PA.new(k, options)
15
+ when :mlp
16
+ MLP.new(k, options)
17
+ else
18
+ raise ArgumentError
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,86 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'json'
3
+ require File.expand_path(File.join(File.dirname(__FILE__), 'classifier'))
4
+
5
+ module NekonekoGen
6
+ class LinearClassifier < Classifier
7
+ attr_reader :w, :bias
8
+ def dot(vec, w)
9
+ dot = 0.0
10
+ vec.each do |k, v|
11
+ if (a = w[k])
12
+ dot += a * v
13
+ end
14
+ end
15
+ dot
16
+ end
17
+ def strip!
18
+ @w.each {|w|
19
+ w.reject!{|k,v|
20
+ if (v.abs < Float::EPSILON)
21
+ # p v
22
+ true
23
+ else
24
+ false
25
+ end
26
+ }
27
+ }
28
+ @w
29
+ end
30
+ def update(vec, label)
31
+ loss = 0.0
32
+ if (@k == 2)
33
+ loss = update_at(0, vec, label)
34
+ else
35
+ s = 1.0 / @k
36
+ @k.times do |i|
37
+ loss += update_at(i, vec, label) * s
38
+ end
39
+ end
40
+ loss
41
+ end
42
+ def features(i = -1)
43
+ if (i < 0)
44
+ w.reduce(0){|sum, v| sum + v.size }
45
+ else
46
+ w[i].size
47
+ end
48
+ end
49
+ def parameter_code(lang, index_converter = lambda{|i| i})
50
+ lang ||= :ruby
51
+ case lang
52
+ when :ruby
53
+ else
54
+ raise NotImplementedError
55
+ end
56
+
57
+ wvec = self.strip!.map {|w|
58
+ w.reduce({}) {|h, kv| h[index_converter.call(kv[0])] = kv[1]; h }
59
+ }
60
+ <<CODE
61
+ BIAS = #{self.bias.inspect}
62
+ W = JSON.load(#{wvec.to_json.inspect})
63
+ CODE
64
+ end
65
+ def classify_method_code(lang)
66
+ lang ||= :ruby
67
+ case lang
68
+ when :ruby
69
+ else
70
+ raise NotImplementedError
71
+ end
72
+
73
+ <<CODE
74
+ def self.classify(vec)
75
+ if (K == 2)
76
+ BIAS[0] + W[0].values_at(*vec).compact.reduce(0.0, :+) > 0.0 ? 0 : 1
77
+ else
78
+ W.each_with_index.map {|w, i|
79
+ [BIAS[i] + w.values_at(*vec).compact.reduce(0.0, :+), i]
80
+ }.max.pop
81
+ end
82
+ end
83
+ CODE
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,176 @@
1
+ require 'json'
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'classifier'))
3
+
4
+ module NekonekoGen
5
+ # Multi Layer Perceptron
6
+ class MLP < Classifier
7
+ IR = 0.4
8
+ HR = 0.1
9
+ NOISE_VAR = 0.3
10
+ MARGIN = 0.2
11
+ DEFAULT_ITERATION = 40
12
+
13
+ def default_hidden_unit
14
+ @k
15
+ end
16
+ def initialize(k, options)
17
+ @k = k
18
+ @output_units = @k == 2 ? 1 : @k
19
+ @hidden_units = (options[:c] || default_hidden_unit).to_i
20
+ @input = []
21
+ @hidden = []
22
+ @input_bias = []
23
+ @hidden_bias = []
24
+ @hidden_units.times do |i|
25
+ @input[i] = Hash.new {|hash, key| hash[key] = default_value }
26
+ @input_bias[i] = default_value
27
+ end
28
+ @output_units.times do |i|
29
+ @hidden[i] = []
30
+ @hidden_units.times do |j|
31
+ @hidden[i][j] = default_value
32
+ end
33
+ @hidden_bias[i] = default_value
34
+ end
35
+ end
36
+ def update(vec, label)
37
+ input_y = []
38
+ hidden_y = []
39
+ output_y = []
40
+
41
+ input_y = @hidden_units.times.map do |i|
42
+ w = @input[i]
43
+ sigmoid(@input_bias[i] + vec.map{|k, v| w[k] * v}.reduce(:+) + noise)
44
+ end
45
+ hidden_y = @output_units.times.map do |i|
46
+ @hidden_bias[i] + input_y.zip(@hidden[i]).map{|a, b| a * b }.reduce(:+)
47
+ end
48
+ output_y = @output_units.times.map do |i|
49
+ sigmoid(hidden_y[i])
50
+ end
51
+
52
+ loss = 0.0
53
+ dotrain = false
54
+ if (@output_units == 1)
55
+ if (output_y[0] > 0.5)
56
+ l = 0
57
+ else
58
+ l = 1
59
+ end
60
+ if (label == 0)
61
+ if (output_y[0] < 1.0 - MARGIN)
62
+ dotrain = true
63
+ end
64
+ else
65
+ if (output_y[0] > MARGIN)
66
+ dotrain = true
67
+ end
68
+ end
69
+ loss = (label == l) ? 0.0 : 1.0
70
+ else
71
+ max_p, l = output_y.each_with_index.max
72
+ if (l == label)
73
+ if (max_p < 1.0 - MARGIN)
74
+ dotrain = true
75
+ end
76
+ else
77
+ loss = 1.0
78
+ dotrain = true
79
+ end
80
+ end
81
+ if (dotrain)
82
+ output_bp = @output_units.times.map do |i|
83
+ y = hidden_y[i]
84
+ yt = (label == i) ? 1.0 : 0.0
85
+ expy = Math.exp(y)
86
+ -((2.0 * yt - 1.0) * expy + yt) / (Math.exp(2.0 * y) + 2.0 * expy + 1.0)
87
+ end
88
+ hidden_bp = @hidden_units.times.map do |j|
89
+ y = 0.0
90
+ @output_units.times do |i|
91
+ y += output_bp[i] * @hidden[i][j]
92
+ end
93
+ y * (1.0 - input_y[j]) * input_y[j]
94
+ end
95
+ @output_units.times do |j|
96
+ hidden = @hidden[j]
97
+ @hidden_units.times do |i|
98
+ hidden[i] -= HR * input_y[i] * output_bp[j]
99
+ end
100
+ @hidden_bias[j] -= HR * output_bp[j]
101
+ end
102
+ @hidden_units.times do |i|
103
+ input = @input[i]
104
+ vec.each do |k, v|
105
+ input[k] -= IR * v * hidden_bp[i]
106
+ end
107
+ @input_bias[i] -= IR * hidden_bp[i]
108
+ end
109
+ end
110
+ loss
111
+ end
112
+ def features(i = -1)
113
+ @input.map{|v| v.size }.reduce(:+)
114
+ end
115
+ def sigmoid(a)
116
+ 1.0 / (1.0 + Math.exp(-a))
117
+ end
118
+ def default_value
119
+ (rand - 0.5)
120
+ end
121
+ def noise
122
+ (Math.sqrt(-2.0 * Math.log(rand)) * Math.sin(2.0 * Math::PI * rand)) * NOISE_VAR
123
+ end
124
+ def default_iteration
125
+ DEFAULT_ITERATION
126
+ end
127
+ def parameter_code(lang, index_converter = lambda{|i| i})
128
+ lang ||= :ruby
129
+ case lang
130
+ when :ruby
131
+ else
132
+ raise NotImplementedError
133
+ end
134
+
135
+ wvec = @input.map {|w|
136
+ w.reduce({}) {|h, kv| h[index_converter.call(kv[0])] = kv[1]; h }
137
+ }
138
+ <<CODE
139
+ HIDDEN_UNITS = #{@hidden_units}
140
+ INPUT_BIAS = #{@input_bias.inspect}
141
+ HIDDEN_BIAS = #{@hidden_bias.inspect}
142
+ INPUT_W = JSON.load(#{wvec.to_json.inspect})
143
+ HIDDEN_W = #{@hidden.inspect}
144
+ CODE
145
+ end
146
+ def classify_method_code(lang)
147
+ lang ||= :ruby
148
+ case lang
149
+ when :ruby
150
+ else
151
+ raise NotImplementedError
152
+ end
153
+ <<CODE
154
+ def self.classify(vec)
155
+ input_y = []
156
+ output_y = []
157
+ HIDDEN_UNITS.times do |i|
158
+ input_y[i] = sigmoid(INPUT_BIAS[i] +
159
+ INPUT_W[i].values_at(*vec).compact.reduce(0.0, :+))
160
+ end
161
+ if (K == 2)
162
+ HIDDEN_BIAS[0] +
163
+ input_y.zip(HIDDEN_W[0]).map{|a, b| a * b }.reduce(:+) > 0.0 ? 0 : 1
164
+ else
165
+ K.times.map{|i|
166
+ [HIDDEN_BIAS[i] + input_y.zip(HIDDEN_W[i]).map{|a, b| a * b }.reduce(:+), i]
167
+ }.max.pop
168
+ end
169
+ end
170
+ def self.sigmoid(a)
171
+ 1.0 / (1.0 + Math.exp(-a))
172
+ end
173
+ CODE
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,68 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'linear_classifier'))
3
+
4
+ module NekonekoGen
5
+ # Passive Agressive
6
+ class PA < LinearClassifier
7
+ C = 1.0
8
+ NORM = 2.0 # norm + BIAS
9
+ DEFAULT_ITERATION = 20
10
+
11
+ def initialize(k, options = {})
12
+ @k = k
13
+ @c = options[:c] || C
14
+ @w = []
15
+ @bias = []
16
+ if (@k == 2)
17
+ @w[0] = Hash.new(0.0)
18
+ @bias[0] = 0.0
19
+ else
20
+ k.times do |i|
21
+ @w[i] = Hash.new(0.0)
22
+ @bias[i] = 0.0
23
+ end
24
+ end
25
+ if options[:method]
26
+ @tau =
27
+ case options[:method]
28
+ when :pa
29
+ lambda{|y, l| pa(y, l)}
30
+ when :pa1
31
+ lambda{|y, l| pa1(y, l)}
32
+ when :pa2
33
+ lambda{|y, l| pa2(y, l)}
34
+ else
35
+ lambda{|y, l| pa2(y, l)}
36
+ end
37
+ else
38
+ @tau = lambda{|y, l| pa2(y, l)}
39
+ end
40
+ end
41
+ def pa2(y, l)
42
+ y * (l / NORM + 0.5 / @c)
43
+ end
44
+ def pa1(y, l)
45
+ y * [@c, (l / NORM)].min
46
+ end
47
+ def pa(y, l)
48
+ y * l / NORM
49
+ end
50
+ def update_at(i, vec, label)
51
+ y = label == i ? 1 : -1
52
+ w = @w[i]
53
+ score = @bias[i] + dot(vec, w)
54
+ l = 1.0 - score * y
55
+ if (l > 0.0)
56
+ alpha = @tau.call(y, l)
57
+ vec.each do |k, v|
58
+ w[k] += alpha * v
59
+ end
60
+ @bias[i] += alpha
61
+ end
62
+ y * score < 0.0 ? 1.0 : 0.0
63
+ end
64
+ def default_iteration
65
+ DEFAULT_ITERATION
66
+ end
67
+ end
68
+ end
@@ -1,9 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
- require 'json'
3
2
  require 'nkf'
4
3
  require 'bimyou_segmenter'
5
4
 
6
- require File.expand_path(File.join(File.dirname(__FILE__), 'arow'))
5
+ require File.expand_path(File.join(File.dirname(__FILE__), 'classifier_factory'))
7
6
 
8
7
  module NekonekoGen
9
8
  class TextClassifierGenerator
@@ -15,16 +14,15 @@ module NekonekoGen
15
14
  @files = files
16
15
  @word2id = {}
17
16
  @id2word = {}
18
- @arow = Arow.new(files.size, options)
19
-
17
+ @classifier = ClassifierFactory.create(files.size, options)
20
18
  @name = safe_name(@filename).split("_").map(&:capitalize).join
21
19
  @labels = files.map {|file| "#{safe_name(file).upcase}"}
22
20
  end
23
21
 
24
- def train(iteration = 20)
25
- iteration ||= 20
22
+ def train(iteration = nil)
23
+ iteration ||= @classifier.default_iteration
26
24
  data = []
27
- @arow.k.times do |i|
25
+ @classifier.k.times do |i|
28
26
  t = Time.now
29
27
  data[i] = []
30
28
  print "loading #{@files[i]}... "
@@ -49,31 +47,35 @@ module NekonekoGen
49
47
  t = Time.now
50
48
  print sprintf("step %3d...", step)
51
49
 
52
- @arow.k.times.map do |i|
50
+ @classifier.k.times.map do |i|
53
51
  sampling(data[i], samples).map {|vec| [vec, i] }
54
52
  end.flatten(1).shuffle!.each do |v|
55
- loss += @arow.update(v[0], v[1])
53
+ loss += @classifier.update(v[0], v[1])
56
54
  c += 1
57
55
  end
58
56
  print sprintf(" %.6f, %.4fs\n", 1.0 - loss / c.to_f, Time.now - t)
59
57
  end
60
- @arow.strip!
61
-
62
- if (@arow.k > 2)
63
- @arow.w.each_with_index do |w, i|
64
- puts "#{@labels[i]} : #{w.size} features"
58
+ if (@classifier.k > 2)
59
+ @classifier.k.times do |i|
60
+ puts "#{@labels[i]} : #{@classifier.features(i)} features"
65
61
  end
66
62
  else
67
- puts "#{@labels[0]}, #{@labels[1]} : #{@arow.w[0].size} features"
63
+ puts "#{@labels[0]}, #{@labels[1]} : #{@classifier.features(0)} features"
68
64
  end
69
65
  puts "done nyan! "
70
66
  end
71
- def generate
72
- wv = @arow.w.map {|w|
73
- w.reduce({}) {|h, kv| h[id2word(kv[0])] = kv[1]; h }
74
- }
67
+ def generate(lang = :ruby)
68
+ lang ||= :ruby
69
+ case lang
70
+ when :ruby
71
+ generate_ruby_code
72
+ else
73
+ raise NotImplementedError
74
+ end
75
+ @name
76
+ end
77
+ def generate_ruby_code
75
78
  labels = @labels.each_with_index.map{|v, i| " #{v} = #{i}"}.join("\n")
76
-
77
79
  File.open(@filename, "w") do |f|
78
80
  f.write <<MODEL
79
81
  # -*- coding: utf-8 -*-
@@ -82,9 +84,21 @@ require 'json'
82
84
  require 'bimyou_segmenter'
83
85
 
84
86
  class #{@name}
87
+ def self.k
88
+ K
89
+ end
85
90
  def self.predict(text)
91
+ classify(fv(text))
92
+ end
93
+
94
+ #{labels}
95
+ LABELS = #{@labels.inspect}
96
+ K = #{@classifier.k}
97
+
98
+ private
99
+ def self.fv(text)
86
100
  prev = nil
87
- vec = BimyouSegmenter.segment(text).map do |word|
101
+ BimyouSegmenter.segment(text).map do |word|
88
102
  if (prev)
89
103
  if (NGRAM_TARGET =~ word)
90
104
  nword = [prev + word, word]
@@ -101,27 +115,14 @@ class #{@name}
101
115
  word
102
116
  end
103
117
  end.flatten(1)
104
- vec << " bias "
105
- if (W.size == 1)
106
- W[0].values_at(*vec).compact.reduce(:+) > 0.0 ? 0 : 1
107
- else
108
- W.each_with_index.map {|w,i|
109
- [w.values_at(*vec).compact.reduce(:+), i]
110
- }.max.pop
111
- end
112
118
  end
113
- def self.k
114
- W.size == 1 ? 2 : W.size
115
- end
116
- #{labels}
117
- LABELS = #{@labels.inspect}
119
+ #{@classifier.classify_method_code(:ruby)}
118
120
 
119
- private
120
121
  NGRAM_TARGET = Regexp.new('(^[ァ-ヾ]+$)|(^[a-zA-Z\\-_a-zA-Z‐_0-90-9]+$)|' +
121
122
  '(^[々〇ヵヶ' + [0x3400].pack('U') + '-' + [0x9FFF].pack('U') +
122
123
  [0xF900].pack('U') + '-' + [0xFAFF].pack('U') +
123
- [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']+$)')
124
- W = JSON.load(#{wv.to_json.inspect})
124
+ [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']+$)')
125
+ #{@classifier.parameter_code(:ruby, lambda{|id| id2word(id) })}
125
126
  end
126
127
  MODEL
127
128
  end
@@ -143,8 +144,6 @@ MODEL
143
144
  end
144
145
  def fv(text)
145
146
  vec = Hash.new(0)
146
- vec[word2id(" bias ")] = 1
147
-
148
147
  prev = nil
149
148
  words = BimyouSegmenter.segment(text, :white_space => true).map do |word|
150
149
  if (prev)
@@ -170,7 +169,7 @@ MODEL
170
169
  vec
171
170
  end
172
171
  def normalize(vec)
173
- norm = Math.sqrt(vec.each_value.reduce(0){|a, v| a + v * v })
172
+ norm = Math.sqrt(vec.values.map{|v| v * v }.reduce(:+))
174
173
  if (norm > 0.0)
175
174
  s = 1.0 / norm
176
175
  vec.each do |k, v|
@@ -1,4 +1,4 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  module NekonekoGen
3
- VERSION = "0.1.1"
3
+ VERSION = "0.2.1"
4
4
  end
data/lib/nekoneko_gen.rb CHANGED
@@ -5,29 +5,51 @@ require 'optparse'
5
5
  require 'fileutils'
6
6
 
7
7
  module NekonekoGen
8
- DEFAULT_ITERATION = 20
9
-
10
8
  def self.run(argv)
11
- iteration = DEFAULT_ITERATION
9
+ iteration = nil
12
10
  rubyfile = nil
13
11
  quiet = false
14
-
15
- $stdout.sync = true
16
12
 
13
+ $stdout.sync = true
14
+ method = nil
15
+ c = nil
17
16
  opt = OptionParser.new do |o|
18
17
  o.on('-n NAME', 'new classifier name') do |v|
19
18
  rubyfile = File.join(File.dirname(v), File.basename(v, ".*") + ".rb")
20
19
  FileUtils.touch(rubyfile)
21
20
  end
22
- o.on('-i N', "iteration count (default: #{DEFAULT_ITERATION})") do |v|
21
+ o.on('-i N', "iteration (default: auto)") do |v|
23
22
  iteration = v.to_i.abs
24
23
  end
24
+ o.on('-m METHOD', "machine learning method [AROW|PA2|MLP] (default AROW)") do |v|
25
+ if (v)
26
+ case v.downcase
27
+ when 'arow'
28
+ method = :arow
29
+ when 'pa1'
30
+ method = :pa1
31
+ when 'pa2'
32
+ method = :pa2
33
+ when 'mlp'
34
+ method = :mlp
35
+ else
36
+ warn opt
37
+ return -1
38
+ end
39
+ else
40
+ warn opt
41
+ return -1
42
+ end
43
+ end
44
+ o.on('-p C', "parameter (default AROW::R=6.0, PA2::C=1.0, MLP::HIDDEN_UNIT=K)") do |v|
45
+ c = v.to_f
46
+ end
25
47
  o.on('-q', "quiet") do
26
48
  quiet = true
27
49
  end
28
50
  end
29
51
  opt.version = NekonekoGen::VERSION
30
- opt.banner = "Usage: nekoneko_gen -n classifier_name file1 file2 [files...]"
52
+ opt.banner = "Usage: nekoneko_gen [OPTIONS] -n NAME FILE1 FILE2 [FILES...]"
31
53
  files = opt.parse(argv)
32
54
 
33
55
  unless (rubyfile)
@@ -45,7 +67,7 @@ module NekonekoGen
45
67
  end
46
68
  end
47
69
 
48
- gen = NekonekoGen::TextClassifierGenerator.new(rubyfile, files)
70
+ gen = NekonekoGen::TextClassifierGenerator.new(rubyfile, files, {:method => method, :c => c})
49
71
  if (quiet)
50
72
  gen.quiet = true
51
73
  end
@@ -6,118 +6,146 @@ class NekonekoGenTest < Test::Unit::TestCase
6
6
  @file0 = File.join(File.dirname(__FILE__), 'class0.txt')
7
7
  @file1 = File.join(File.dirname(__FILE__), 'class1.txt')
8
8
  @file2 = File.join(File.dirname(__FILE__), 'class2.txt')
9
- @output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_classifier.rb")
10
- @output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_classifier.rb")
9
+ @clean_files = []
11
10
  end
12
11
  def teardown
13
- cleanup!
12
+ @clean_files.each do |file|
13
+ if (File.exist?(file))
14
+ File.unlink(file)
15
+ end
16
+ end
14
17
  end
15
- def cleanup!
16
- begin
17
- File.unlink(@output_file2)
18
- rescue
18
+
19
+ def test_mlp
20
+ gen2('mlp', {:method => :mlp})
21
+ gen3('mlp', {:method => :mlp})
22
+ end
23
+ def test_pa2
24
+ gen2('pa2', {:method => :pa2})
25
+ gen3('pa2', {:method => :pa2})
26
+ end
27
+ def test_arow
28
+ gen2('arow', {:method => :arow})
29
+ gen3('arow',{:method => :arow})
30
+ end
31
+
32
+ def clean!(a, b)
33
+ if (File.exist?(a))
34
+ File.unlink(a)
19
35
  end
20
- begin
21
- File.unlink(@output_file3)
22
- rescue
36
+ if (File.exist?(b))
37
+ File.unlink(b)
23
38
  end
24
- end
39
+ end
25
40
 
26
- def test_gen2
27
- cleanup!
41
+ def gen2(prefix, options)
42
+ p "---- #{prefix} generate 2class"
43
+ output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_#{prefix}_classifier.rb")
44
+ output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_#{prefix}_classifier.rb")
28
45
 
29
- gen = NekonekoGen::TextClassifierGenerator.new(@output_file2, [@file0, @file1])
30
- #gen.quiet = true
31
- gen.train(NekonekoGen::DEFAULT_ITERATION)
32
- gen.generate
46
+ clean!(output_file2, output_file3)
47
+ @clean_files << output_file2
48
+ @clean_files << output_file3
33
49
 
34
- unless (File.exist?(@output_file2))
35
- assert_equal "#{@output_file2} not found", nil
50
+ gen = NekonekoGen::TextClassifierGenerator.new(output_file2, [@file0, @file1], options)
51
+ gen.train
52
+ modname = gen.generate
53
+
54
+ unless (File.exist?(output_file2))
55
+ assert_equal "#{output_file2} not found", nil
36
56
  end
37
57
 
38
58
  begin
39
- load @output_file2
59
+ load output_file2
40
60
 
61
+ mod = Kernel.const_get(modname)
41
62
  ok = 0
42
63
  count = 0
43
64
  File.open(@file0) do |f|
44
65
  until f.eof?
45
- if (NekonekoTest2Classifier.predict(f.readline) == NekonekoTest2Classifier::CLASS0)
66
+ if (mod.predict(f.readline) == mod::CLASS0)
46
67
  ok += 1
47
68
  end
48
69
  count += 1
49
70
  end
50
71
  end
51
- puts "#{NekonekoTest2Classifier::LABELS[0]}: #{ok.to_f / count}"
72
+ puts "#{mod::LABELS[0]}: #{ok.to_f / count}"
52
73
  assert ok.to_f / count > 0.9
53
-
74
+
54
75
  ok = 0
55
76
  count = 0
56
77
  File.open(@file1) do |f|
57
78
  until f.eof?
58
- if (NekonekoTest2Classifier.predict(f.readline) == NekonekoTest2Classifier::CLASS1)
79
+ if (mod.predict(f.readline) == mod::CLASS1)
59
80
  ok += 1
60
81
  end
61
82
  count += 1
62
83
  end
63
84
  end
64
- puts "#{NekonekoTest2Classifier::LABELS[1]}: #{ok.to_f / count}"
85
+ puts "#{mod::LABELS[1]}: #{ok.to_f / count}"
65
86
  assert ok.to_f / count > 0.9
66
87
  end
67
88
  end
68
89
 
69
- def test_gen3
70
- cleanup!
90
+ def gen3(prefix, options)
91
+ p "---- #{prefix} generate 3class"
92
+ output_file2 = File.join(Dir.tmpdir, "nekoneko_test2_#{prefix}_classifier.rb")
93
+ output_file3 = File.join(Dir.tmpdir, "nekoneko_test3_#{prefix}_classifier.rb")
94
+
95
+ clean!(output_file2, output_file3)
96
+ @clean_files << output_file2
97
+ @clean_files << output_file3
71
98
 
72
- gen = NekonekoGen::TextClassifierGenerator.new(@output_file3, [@file0, @file1, @file2])
73
- #gen.quiet = true
74
- gen.train(NekonekoGen::DEFAULT_ITERATION)
75
- gen.generate
99
+ gen = NekonekoGen::TextClassifierGenerator.new(output_file3,
100
+ [@file0, @file1, @file2], options)
101
+ gen.train
102
+ modname = gen.generate
76
103
 
77
- unless (File.exist?(@output_file3))
78
- assert_equal "#{@output_file3} not found", nil
104
+ unless (File.exist?(output_file3))
105
+ assert_equal "#{output_file3} not found", nil
79
106
  end
80
107
 
81
108
  begin
82
- load @output_file3
83
-
109
+ load output_file3
110
+
111
+ mod = Kernel.const_get(modname)
84
112
  ok = 0
85
113
  count = 0
86
114
  File.open(@file0) do |f|
87
115
  until f.eof?
88
- if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS0)
116
+ if (mod.predict(f.readline) == mod::CLASS0)
89
117
  ok += 1
90
118
  end
91
119
  count += 1
92
120
  end
93
121
  end
94
- puts "#{NekonekoTest3Classifier::LABELS[0]}: #{ok.to_f / count}"
122
+ puts "#{mod::LABELS[0]}: #{ok.to_f / count}"
95
123
  assert ok.to_f / count > 0.9
96
124
 
97
125
  ok = 0
98
126
  count = 0
99
127
  File.open(@file1) do |f|
100
128
  until f.eof?
101
- if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS1)
129
+ if (mod.predict(f.readline) == mod::CLASS1)
102
130
  ok += 1
103
131
  end
104
132
  count += 1
105
133
  end
106
134
  end
107
- puts "#{NekonekoTest3Classifier::LABELS[1]}: #{ok.to_f / count}"
135
+ puts "#{mod::LABELS[1]}: #{ok.to_f / count}"
108
136
  assert ok.to_f / count > 0.9
109
137
 
110
138
  ok = 0
111
139
  count = 0
112
140
  File.open(@file2) do |f|
113
141
  until f.eof?
114
- if (NekonekoTest3Classifier.predict(f.readline) == NekonekoTest3Classifier::CLASS2)
142
+ if (mod.predict(f.readline) == mod::CLASS2)
115
143
  ok += 1
116
144
  end
117
145
  count += 1
118
146
  end
119
147
  end
120
- puts "#{NekonekoTest3Classifier::LABELS[2]}: #{ok.to_f / count}"
148
+ puts "#{mod::LABELS[2]}: #{ok.to_f / count}"
121
149
  assert ok.to_f / count > 0.9
122
150
  end
123
151
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nekoneko_gen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-29 00:00:00.000000000Z
12
+ date: 2012-06-01 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bimyou_segmenter
16
- requirement: &7671220 !ruby/object:Gem::Requirement
16
+ requirement: &14306440 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *7671220
24
+ version_requirements: *14306440
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: json
27
- requirement: &7668960 !ruby/object:Gem::Requirement
27
+ requirement: &14304220 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *7668960
35
+ version_requirements: *14304220
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: test-unit
38
- requirement: &7641320 !ruby/object:Gem::Requirement
38
+ requirement: &14303060 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *7641320
46
+ version_requirements: *14303060
47
47
  description: Japanese Text Classifier Generator
48
48
  email:
49
49
  - nagadomi@nurs.or.jp
@@ -60,6 +60,11 @@ files:
60
60
  - bin/nekoneko_gen
61
61
  - lib/nekoneko_gen.rb
62
62
  - lib/nekoneko_gen/arow.rb
63
+ - lib/nekoneko_gen/classifier.rb
64
+ - lib/nekoneko_gen/classifier_factory.rb
65
+ - lib/nekoneko_gen/linear_classifier.rb
66
+ - lib/nekoneko_gen/mlp.rb
67
+ - lib/nekoneko_gen/pa.rb
63
68
  - lib/nekoneko_gen/text_classifier_generator.rb
64
69
  - lib/nekoneko_gen/version.rb
65
70
  - nekoneko_gen.gemspec