tiny-classifier 2.1 → 2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -0
- data/README.md +0 -0
- data/Rakefile +5 -0
- data/bin/tc-classify +2 -2
- data/bin/tc-generate-classifier +2 -2
- data/bin/tc-retrain +2 -2
- data/bin/tc-train +2 -2
- data/bin/tc-untrain +2 -2
- data/lib/tiny-classifier/category-manager.rb +61 -0
- data/lib/tiny-classifier/command/base.rb +162 -0
- data/lib/tiny-classifier/{classifier.rb → command/classify.rb} +13 -16
- data/lib/tiny-classifier/command/generate-classifier.rb +88 -0
- data/lib/tiny-classifier/command/retrain.rb +75 -0
- data/lib/tiny-classifier/{retrainer.rb → command/train.rb} +17 -21
- data/lib/tiny-classifier/command/untrain.rb +43 -0
- data/lib/tiny-classifier/errors.rb +104 -0
- data/lib/tiny-classifier/{untrainer.rb → input.rb} +12 -14
- data/lib/tiny-classifier/tokenizer.rb +0 -0
- data/tiny-classifier.gemspec +5 -1
- metadata +55 -10
- data/lib/tiny-classifier/base.rb +0 -136
- data/lib/tiny-classifier/classifier-generator.rb +0 -88
- data/lib/tiny-classifier/trainer.rb +0 -74
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e7e246aa9446d56c68ea631b0c2dda4bd0fb506
|
4
|
+
data.tar.gz: 42f9c4ca23ca91a6d2e9e2121d7eb860ded738c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3964e9cc15c6c7a4e6f49a6a7e945f3596c863169c812303293ca1c92a66407a0d96bd6abc3e4a521fa40eca524eece08bb98315521cedfe1fc816e599af80fe
|
7
|
+
data.tar.gz: 0fb9acbac0b1fc12fa72381af63085f7bf2f1aef6674ecfced6ebf6d0645a7ca274d27ae708ce6ea26f85f54fefb66ccd6b1fe2b5e0ff6c3308d30257b489c75
|
data/Gemfile
CHANGED
File without changes
|
data/README.md
CHANGED
File without changes
|
data/Rakefile
CHANGED
data/bin/tc-classify
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/
|
18
|
+
require "tiny-classifier/command/classify"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::Classify.run
|
data/bin/tc-generate-classifier
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/classifier
|
18
|
+
require "tiny-classifier/command/generate-classifier"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::GenerateClassifier.run
|
data/bin/tc-retrain
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/
|
18
|
+
require "tiny-classifier/command/retrain"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::Retrain.run
|
data/bin/tc-train
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/
|
18
|
+
require "tiny-classifier/command/train"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::Train.run
|
data/bin/tc-untrain
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/
|
18
|
+
require "tiny-classifier/command/untrain"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::Untrain.run
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
module TinyClassifier
|
17
|
+
class CategoryManager
|
18
|
+
attr_reader :chosen
|
19
|
+
|
20
|
+
def initialize(categories)
|
21
|
+
@categories = categories.strip.split(",")
|
22
|
+
normalize_all
|
23
|
+
clanup
|
24
|
+
end
|
25
|
+
|
26
|
+
def all
|
27
|
+
@categories
|
28
|
+
end
|
29
|
+
|
30
|
+
def valid?(category)
|
31
|
+
category = normalize(category)
|
32
|
+
@categories.include?(category)
|
33
|
+
end
|
34
|
+
|
35
|
+
def basename
|
36
|
+
@categories.join("-").downcase
|
37
|
+
end
|
38
|
+
|
39
|
+
def normalize(category)
|
40
|
+
category
|
41
|
+
.downcase
|
42
|
+
.strip
|
43
|
+
.capitalize
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def normalize_all
|
48
|
+
@categories.collect! do |category|
|
49
|
+
normalize(category)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def clanup
|
54
|
+
@categories.reject! do |category|
|
55
|
+
category.empty?
|
56
|
+
end
|
57
|
+
@categories.uniq!
|
58
|
+
@categories.sort!
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "pathname"
|
17
|
+
require "optparse"
|
18
|
+
require "classifier-reborn"
|
19
|
+
require "tiny-classifier/tokenizer"
|
20
|
+
require "tiny-classifier/category-manager"
|
21
|
+
require "tiny-classifier/input"
|
22
|
+
require "tiny-classifier/errors"
|
23
|
+
|
24
|
+
module TinyClassifier
|
25
|
+
module Command
|
26
|
+
class Base
|
27
|
+
class << self
|
28
|
+
def run(argv=nil)
|
29
|
+
argv ||= ARGV.dup
|
30
|
+
command = new(argv)
|
31
|
+
command.run
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
attr_reader :tokenizer
|
36
|
+
attr_writer :classifier
|
37
|
+
|
38
|
+
def initialize(argv=[])
|
39
|
+
@categories = nil
|
40
|
+
@tokenizer = Tokenizer.new
|
41
|
+
@data_dir = Dir.pwd
|
42
|
+
@verbose = false
|
43
|
+
end
|
44
|
+
|
45
|
+
def run
|
46
|
+
raise NoCategories.new unless @categories
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse_command_line_options(command_line_options)
|
50
|
+
option_parser.parse!(command_line_options)
|
51
|
+
end
|
52
|
+
|
53
|
+
def classifier
|
54
|
+
@classifier ||= prepare_classifier
|
55
|
+
end
|
56
|
+
|
57
|
+
def data_file_name
|
58
|
+
"tc.#{@categories.basename}.dat"
|
59
|
+
end
|
60
|
+
|
61
|
+
def data_file_path
|
62
|
+
@data_file_path ||= prepare_data_file_path
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
def option_parser
|
67
|
+
@option_parser ||= create_option_parser
|
68
|
+
end
|
69
|
+
|
70
|
+
def create_option_parser
|
71
|
+
parser = OptionParser.new
|
72
|
+
|
73
|
+
parser.on("-d PATH", "--data-dir=PATH",
|
74
|
+
"Path to the directory to store training data file (default=current directory)") do |data_dir|
|
75
|
+
@data_dir = data_dir
|
76
|
+
end
|
77
|
+
|
78
|
+
parser.on("-c CATEGORIES", "--categories=CATEGORIES",
|
79
|
+
"List of categories (comma-separated)") do |categories|
|
80
|
+
@categories = CategoryManager.new(categories)
|
81
|
+
end
|
82
|
+
|
83
|
+
parser.on("-t TOKENIZER", "--tokenizer=TOKENIZER",
|
84
|
+
"Tokenizer (default=#{@tokenizer})") do |tokenizer|
|
85
|
+
@tokenizer.type = tokenizer
|
86
|
+
end
|
87
|
+
|
88
|
+
parser.on("-v", "--verbose",
|
89
|
+
"Output internal information (for debugging)") do |verbose|
|
90
|
+
@verbose = verbose
|
91
|
+
end
|
92
|
+
|
93
|
+
parser
|
94
|
+
end
|
95
|
+
|
96
|
+
def prepare_data_file_path
|
97
|
+
path = Pathname(@data_dir)
|
98
|
+
path += data_file_name
|
99
|
+
log("file: #{path}")
|
100
|
+
path
|
101
|
+
end
|
102
|
+
|
103
|
+
def prepare_classifier
|
104
|
+
if data_file_path.exist?
|
105
|
+
data = File.read(data_file_path.to_s)
|
106
|
+
Marshal.load(data)
|
107
|
+
else
|
108
|
+
ClassifierReborn::Bayes.new(*@categories.all)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def save
|
113
|
+
data = Marshal.dump(classifier)
|
114
|
+
File.open(data_file_path, "w") do |file|
|
115
|
+
file.write(data)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def input
|
120
|
+
@input ||= prepare_input
|
121
|
+
end
|
122
|
+
|
123
|
+
def prepare_input
|
124
|
+
input = Input.new
|
125
|
+
raise NoInput.new unless input.given?
|
126
|
+
tokenized = @tokenizer.tokenize(input.read)
|
127
|
+
log("tokenizer: #{@tokenizer.type}")
|
128
|
+
log("tokenized: #{tokenized}")
|
129
|
+
tokenized
|
130
|
+
end
|
131
|
+
|
132
|
+
def prepare_category(category)
|
133
|
+
raise NoCategory.new unless category
|
134
|
+
|
135
|
+
category = @categories.normalize(category)
|
136
|
+
|
137
|
+
unless @categories.valid?(category)
|
138
|
+
raise InvalidCategory.new(category, @categories.all)
|
139
|
+
end
|
140
|
+
category
|
141
|
+
end
|
142
|
+
|
143
|
+
def handle_error(error)
|
144
|
+
case error
|
145
|
+
when TinyClassifierError
|
146
|
+
error(error.message)
|
147
|
+
else
|
148
|
+
error(error.inspect)
|
149
|
+
end
|
150
|
+
false
|
151
|
+
end
|
152
|
+
|
153
|
+
def error(message)
|
154
|
+
$stderr.puts(message)
|
155
|
+
end
|
156
|
+
|
157
|
+
def log(message)
|
158
|
+
$stderr.puts(message) if @verbose
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -13,29 +13,26 @@
|
|
13
13
|
# You should have received a copy of the GNU General Public License
|
14
14
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
15
|
|
16
|
-
require "tiny-classifier/base"
|
16
|
+
require "tiny-classifier/command/base"
|
17
17
|
|
18
18
|
module TinyClassifier
|
19
|
-
|
20
|
-
class
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
classifier.parse_command_line_options(argv)
|
25
|
-
classifier.run
|
19
|
+
module Command
|
20
|
+
class Classify < Base
|
21
|
+
def initialize(argv=[])
|
22
|
+
super
|
23
|
+
parse_command_line_options(argv)
|
26
24
|
end
|
27
|
-
end
|
28
25
|
|
29
|
-
|
26
|
+
def run
|
27
|
+
super
|
28
|
+
raise NoEffectiveInput.new if input.empty?
|
29
|
+
raise NoTrainingData.new(data_file_path) unless data_file_path.exist?
|
30
30
|
|
31
|
-
def run
|
32
|
-
if input.empty?
|
33
|
-
error("Error: No effective input.")
|
34
|
-
false
|
35
|
-
else
|
36
31
|
category = classifier.classify(input)
|
37
|
-
puts
|
32
|
+
$stdout.puts(category.downcase)
|
38
33
|
true
|
34
|
+
rescue StandardError => error
|
35
|
+
handle_error(error)
|
39
36
|
end
|
40
37
|
end
|
41
38
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "tiny-classifier/command/base"
|
17
|
+
require "fileutils"
|
18
|
+
require "base64"
|
19
|
+
|
20
|
+
module TinyClassifier
|
21
|
+
module Command
|
22
|
+
class GenerateClassifier < Base
|
23
|
+
def initialize(argv=[])
|
24
|
+
super
|
25
|
+
|
26
|
+
@output_dir = Dir.pwd
|
27
|
+
option_parser.on("-o PATH", "--output-dir=PATH",
|
28
|
+
"Path to the classifier command to be saved (default=current directory)") do |output_dir|
|
29
|
+
@output_dir = output_dir
|
30
|
+
end
|
31
|
+
|
32
|
+
parse_command_line_options(argv)
|
33
|
+
end
|
34
|
+
|
35
|
+
def run
|
36
|
+
super
|
37
|
+
unless data_file_path.exist?
|
38
|
+
raise NoTrainingData.new(data_file_path)
|
39
|
+
end
|
40
|
+
unless prepare_output_file_path.parent.exist?
|
41
|
+
raise InvalidOutputDir.new(prepare_output_file_path.parent)
|
42
|
+
end
|
43
|
+
|
44
|
+
FileUtils.mkdir_p(output_file_path.parent)
|
45
|
+
File.open(output_file_path, "w") do |file|
|
46
|
+
file.puts("#!/usr/bin/env ruby")
|
47
|
+
file.puts("require \"base64\"")
|
48
|
+
file.puts("require \"classifier-reborn\"")
|
49
|
+
file.puts("require \"tiny-classifier/command/classify\"")
|
50
|
+
file.puts("classifier_code = Base64.strict_decode64(\"#{encoded_classifier}\")")
|
51
|
+
file.puts("command = TinyClassifier::Command::Classify.new([")
|
52
|
+
file.puts(" \"--categories=#{@categories.all.join(",")}\",")
|
53
|
+
file.puts(" \"--tokenizer=#{@tokenizer.type}\",")
|
54
|
+
file.puts("])")
|
55
|
+
file.puts("command.classifier = Marshal.load(classifier_code)")
|
56
|
+
file.puts("command.run")
|
57
|
+
end
|
58
|
+
FileUtils.chmod("a+x", output_file_path)
|
59
|
+
true
|
60
|
+
rescue StandardError => error
|
61
|
+
handle_error(error)
|
62
|
+
end
|
63
|
+
|
64
|
+
def classifier_name
|
65
|
+
@classifier_name ||= "tc-classify-#{@categories.basename}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def output_file_path
|
69
|
+
@output_file_path ||= prepare_output_file_path
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def encoded_classifier
|
74
|
+
@encoded_classifier ||= prepare_encoded_classifier
|
75
|
+
end
|
76
|
+
|
77
|
+
def prepare_encoded_classifier
|
78
|
+
classifier_code = Marshal.dump(classifier)
|
79
|
+
Base64.strict_encode64(classifier_code)
|
80
|
+
end
|
81
|
+
|
82
|
+
def prepare_output_file_path
|
83
|
+
path = Pathname(@output_dir)
|
84
|
+
path + classifier_name
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "tiny-classifier/command/train"
|
17
|
+
|
18
|
+
module TinyClassifier
|
19
|
+
module Command
|
20
|
+
class Retrain < Base
|
21
|
+
def initialize(argv=[])
|
22
|
+
super
|
23
|
+
option_parser.banner += " WRONG CORRECT"
|
24
|
+
*categories = parse_command_line_options(argv)
|
25
|
+
@wrong_category = categories.shift
|
26
|
+
@correct_category = categories.shift
|
27
|
+
end
|
28
|
+
|
29
|
+
def run
|
30
|
+
super
|
31
|
+
prepare_categories
|
32
|
+
raise NoEffectiveInput.new if input.empty?
|
33
|
+
raise NoTrainingData.new(data_file_path) unless data_file_path.exist?
|
34
|
+
|
35
|
+
classifier.untrain(@wrong_category, input)
|
36
|
+
classifier.train(@correct_category, input)
|
37
|
+
save
|
38
|
+
true
|
39
|
+
rescue StandardError => error
|
40
|
+
handle_error(error)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def prepare_categories
|
45
|
+
begin
|
46
|
+
@wrong_category = prepare_category(@wrong_category)
|
47
|
+
rescue StandardError => error
|
48
|
+
case error
|
49
|
+
when NoCategory
|
50
|
+
raise NoWrongCategory.new
|
51
|
+
when InvalidCategory
|
52
|
+
raise InvalidWrongCategory.new(@wrong_category, @categories.all)
|
53
|
+
else
|
54
|
+
raise error
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
begin
|
59
|
+
@correct_category = prepare_category(@correct_category)
|
60
|
+
rescue StandardError => error
|
61
|
+
case error
|
62
|
+
when NoCategory
|
63
|
+
raise NoCorrectCategory.new
|
64
|
+
when InvalidCategory
|
65
|
+
raise InvalidCorrectCategory.new(@correct_category, @categories.all)
|
66
|
+
else
|
67
|
+
raise error
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
log("training as: #{@wrong_category} => #{@correct_category}")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -13,33 +13,29 @@
|
|
13
13
|
# You should have received a copy of the GNU General Public License
|
14
14
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
15
|
|
16
|
-
require "tiny-classifier/
|
16
|
+
require "tiny-classifier/command/base"
|
17
17
|
|
18
18
|
module TinyClassifier
|
19
|
-
|
20
|
-
class
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
*categories =
|
25
|
-
|
26
|
-
correct: categories[1])
|
19
|
+
module Command
|
20
|
+
class Train < Base
|
21
|
+
def initialize(argv=[])
|
22
|
+
super
|
23
|
+
option_parser.banner += " CATEGORY"
|
24
|
+
*categories = parse_command_line_options(argv)
|
25
|
+
@category = categories.first
|
27
26
|
end
|
28
|
-
end
|
29
27
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
classifier.send("untrain_#{@category}", input)
|
38
|
-
@category = params[:correct]
|
39
|
-
prepare_category
|
40
|
-
classifier.send("train_#{@category}", input)
|
28
|
+
def run
|
29
|
+
super
|
30
|
+
@category = prepare_category(@category)
|
31
|
+
log("training as: #{@category}")
|
32
|
+
raise NoEffectiveInput.new if input.empty?
|
33
|
+
|
34
|
+
classifier.train(@category, input)
|
41
35
|
save
|
42
36
|
true
|
37
|
+
rescue StandardError => error
|
38
|
+
handle_error(error)
|
43
39
|
end
|
44
40
|
end
|
45
41
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "tiny-classifier/command/base"
|
17
|
+
|
18
|
+
module TinyClassifier
|
19
|
+
module Command
|
20
|
+
class Untrain < Base
|
21
|
+
def initialize(argv=[])
|
22
|
+
super
|
23
|
+
option_parser.banner += " CATEGORY"
|
24
|
+
*categories = parse_command_line_options(argv)
|
25
|
+
@category = categories.first
|
26
|
+
end
|
27
|
+
|
28
|
+
def run
|
29
|
+
super
|
30
|
+
@category = prepare_category(@category)
|
31
|
+
log("untraining as: #{@category}")
|
32
|
+
raise NoEffectiveInput.new if input.empty?
|
33
|
+
raise NoTrainingData.new(data_file_path) unless data_file_path.exist?
|
34
|
+
|
35
|
+
classifier.untrain(@category, input)
|
36
|
+
save
|
37
|
+
true
|
38
|
+
rescue StandardError => error
|
39
|
+
handle_error(error)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
module TinyClassifier
|
17
|
+
class TinyClassifierError < StandardError
|
18
|
+
end
|
19
|
+
|
20
|
+
class NoInput < TinyClassifierError
|
21
|
+
def message
|
22
|
+
"No input. You need to give any input via the STDIN."
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class NoEffectiveInput < TinyClassifierError
|
27
|
+
def message
|
28
|
+
"No effective input."
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class NoCategories < TinyClassifierError
|
33
|
+
def message
|
34
|
+
"You need to specify categories."
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class NoCategory < TinyClassifierError
|
39
|
+
def message
|
40
|
+
"You need to specify a category for the input."
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class NoWrongCategory < NoCategory
|
45
|
+
def message
|
46
|
+
"You need to specify a category to untrain the input."
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class NoCorrectCategory < NoCategory
|
51
|
+
def message
|
52
|
+
"You need to specify a category to retrain the input."
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class InvalidCategory < TinyClassifierError
|
57
|
+
attr_reader :category, :categories
|
58
|
+
|
59
|
+
def initialize(category, categories)
|
60
|
+
@category = category
|
61
|
+
@categories = categories
|
62
|
+
end
|
63
|
+
|
64
|
+
def message
|
65
|
+
"You need to specify one of valid categories: #{@categories.join(", ")}"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class InvalidWrongCategory < InvalidCategory
|
70
|
+
def message
|
71
|
+
"You need to specify one of valid categories to untrain: #{@categories.join(", ")}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class InvalidCorrectCategory < InvalidCategory
|
76
|
+
def message
|
77
|
+
"You need to specify one of valid categories to retrain: #{@categories.join(", ")}"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class NoTrainingData < TinyClassifierError
|
82
|
+
attr_reader :data_dir
|
83
|
+
|
84
|
+
def initialize(data_dir)
|
85
|
+
@data_dir = data_dir
|
86
|
+
end
|
87
|
+
|
88
|
+
def message
|
89
|
+
"There is no training data at #{@data_dir}."
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class InvalidOutputDir < TinyClassifierError
|
94
|
+
attr_reader :output_dir
|
95
|
+
|
96
|
+
def initialize(output_dir)
|
97
|
+
@output_dir = output_dir
|
98
|
+
end
|
99
|
+
|
100
|
+
def message
|
101
|
+
"#{@output_dir} is not available as the output directory."
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -13,21 +13,19 @@
|
|
13
13
|
# You should have received a copy of the GNU General Public License
|
14
14
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
15
|
|
16
|
-
require "tiny-classifier/trainer"
|
17
|
-
|
18
16
|
module TinyClassifier
|
19
|
-
class
|
20
|
-
def
|
21
|
-
@
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
17
|
+
class Input
|
18
|
+
def initialize(data = nil)
|
19
|
+
@data = data
|
20
|
+
end
|
21
|
+
|
22
|
+
def given?
|
23
|
+
return true if @data or $stdin.is_a?(StringIO)
|
24
|
+
File.pipe?(STDIN)
|
25
|
+
end
|
26
|
+
|
27
|
+
def read
|
28
|
+
@data ||= $stdin.readlines.join(" ").strip
|
31
29
|
end
|
32
30
|
end
|
33
31
|
end
|
File without changes
|
data/tiny-classifier.gemspec
CHANGED
@@ -21,7 +21,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
|
|
21
21
|
|
22
22
|
Gem::Specification.new do |spec|
|
23
23
|
spec.name = "tiny-classifier"
|
24
|
-
spec.version = "2.
|
24
|
+
spec.version = "2.2"
|
25
25
|
spec.homepage = "https://github.com/piroor/tiny-classifier"
|
26
26
|
spec.authors = ["YUKI \"Piro\" Hiroshi"]
|
27
27
|
spec.email = ["piro.outsider.reflex@gmail.com"]
|
@@ -39,4 +39,8 @@ Gem::Specification.new do |spec|
|
|
39
39
|
|
40
40
|
spec.add_runtime_dependency("classifier-reborn")
|
41
41
|
spec.add_runtime_dependency("natto")
|
42
|
+
|
43
|
+
spec.add_development_dependency("bundler")
|
44
|
+
spec.add_development_dependency("rake")
|
45
|
+
spec.add_development_dependency("test-unit")
|
42
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '2.
|
4
|
+
version: '2.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- YUKI "Piro" Hiroshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: classifier-reborn
|
@@ -38,15 +38,57 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: test-unit
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
41
83
|
description: ''
|
42
84
|
email:
|
43
85
|
- piro.outsider.reflex@gmail.com
|
44
86
|
executables:
|
45
|
-
- tc-classify
|
46
87
|
- tc-generate-classifier
|
88
|
+
- tc-untrain
|
47
89
|
- tc-retrain
|
48
90
|
- tc-train
|
49
|
-
- tc-
|
91
|
+
- tc-classify
|
50
92
|
extensions: []
|
51
93
|
extra_rdoc_files: []
|
52
94
|
files:
|
@@ -58,13 +100,16 @@ files:
|
|
58
100
|
- bin/tc-retrain
|
59
101
|
- bin/tc-train
|
60
102
|
- bin/tc-untrain
|
61
|
-
- lib/tiny-classifier/
|
62
|
-
- lib/tiny-classifier/
|
63
|
-
- lib/tiny-classifier/
|
64
|
-
- lib/tiny-classifier/
|
103
|
+
- lib/tiny-classifier/category-manager.rb
|
104
|
+
- lib/tiny-classifier/command/base.rb
|
105
|
+
- lib/tiny-classifier/command/classify.rb
|
106
|
+
- lib/tiny-classifier/command/generate-classifier.rb
|
107
|
+
- lib/tiny-classifier/command/retrain.rb
|
108
|
+
- lib/tiny-classifier/command/train.rb
|
109
|
+
- lib/tiny-classifier/command/untrain.rb
|
110
|
+
- lib/tiny-classifier/errors.rb
|
111
|
+
- lib/tiny-classifier/input.rb
|
65
112
|
- lib/tiny-classifier/tokenizer.rb
|
66
|
-
- lib/tiny-classifier/trainer.rb
|
67
|
-
- lib/tiny-classifier/untrainer.rb
|
68
113
|
- tiny-classifier.gemspec
|
69
114
|
homepage: https://github.com/piroor/tiny-classifier
|
70
115
|
licenses:
|
data/lib/tiny-classifier/base.rb
DELETED
@@ -1,136 +0,0 @@
|
|
1
|
-
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
-
#
|
3
|
-
# This program is free software: you can redistribute it and/or modify
|
4
|
-
# it under the terms of the GNU General Public License as published by
|
5
|
-
# the Free Software Foundation, either version 3 of the License, or
|
6
|
-
# (at your option) any later version.
|
7
|
-
#
|
8
|
-
# This program is distributed in the hope that it will be useful,
|
9
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
-
# GNU General Public License for more details.
|
12
|
-
#
|
13
|
-
# You should have received a copy of the GNU General Public License
|
14
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
-
|
16
|
-
require "pathname"
|
17
|
-
require "optparse"
|
18
|
-
require "classifier-reborn"
|
19
|
-
require "tiny-classifier/tokenizer"
|
20
|
-
|
21
|
-
module TinyClassifier
|
22
|
-
class Base
|
23
|
-
attr_reader :tokenizer
|
24
|
-
|
25
|
-
def initialize
|
26
|
-
@tokenizer = Tokenizer.new
|
27
|
-
@data_dir = Dir.pwd
|
28
|
-
@verbose = false
|
29
|
-
end
|
30
|
-
|
31
|
-
def parse_command_line_options(command_line_options)
|
32
|
-
option_parser.parse!(command_line_options)
|
33
|
-
end
|
34
|
-
|
35
|
-
def classifier
|
36
|
-
@classifier ||= prepare_classifier
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
def option_parser
|
41
|
-
@option_parser ||= create_option_parser
|
42
|
-
end
|
43
|
-
|
44
|
-
def create_option_parser
|
45
|
-
parser = OptionParser.new
|
46
|
-
|
47
|
-
parser.on("-d PATH", "--data-dir=PATH",
|
48
|
-
"Path to the directory to store training data file (default=current directory)") do |data_dir|
|
49
|
-
@data_dir = data_dir
|
50
|
-
end
|
51
|
-
|
52
|
-
parser.on("-c CATEGORIES", "--categories=CATEGORIES",
|
53
|
-
"List of categories (comma-separated)") do |categories|
|
54
|
-
@categories = normalize_categories(categories)
|
55
|
-
log("categories: #{@categories}")
|
56
|
-
end
|
57
|
-
|
58
|
-
parser.on("-t TOKENIZER", "--tokenizer=TOKENIZER",
|
59
|
-
"Tokenizer (default=#{@tokenizer})") do |tokenizer|
|
60
|
-
@tokenizer.type = tokenizer
|
61
|
-
end
|
62
|
-
|
63
|
-
parser.on("-v", "--verbose",
|
64
|
-
"Output internal information (for debugging)") do |verbose|
|
65
|
-
@verbose = verbose
|
66
|
-
end
|
67
|
-
|
68
|
-
parser
|
69
|
-
end
|
70
|
-
|
71
|
-
def normalize_categories(categories)
|
72
|
-
categories
|
73
|
-
.strip
|
74
|
-
.downcase
|
75
|
-
.split(",")
|
76
|
-
.collect(&:strip)
|
77
|
-
.reject do |category|
|
78
|
-
category.empty?
|
79
|
-
end
|
80
|
-
.sort
|
81
|
-
.collect(&:capitalize)
|
82
|
-
end
|
83
|
-
|
84
|
-
def data_file_name
|
85
|
-
@data_file_basename ||= prepare_data_file_name
|
86
|
-
end
|
87
|
-
|
88
|
-
def prepare_data_file_name
|
89
|
-
categories = @categories.join("-").downcase
|
90
|
-
"tc.#{categories}.dat"
|
91
|
-
end
|
92
|
-
|
93
|
-
def data_file_path
|
94
|
-
@data_file_path ||= prepare_data_file_path
|
95
|
-
end
|
96
|
-
|
97
|
-
def prepare_data_file_path
|
98
|
-
path = Pathname(@data_dir)
|
99
|
-
path + data_file_name
|
100
|
-
end
|
101
|
-
|
102
|
-
def prepare_classifier
|
103
|
-
if data_file_path.exist?
|
104
|
-
data = File.read(data_file_path.to_s)
|
105
|
-
Marshal.load(data)
|
106
|
-
else
|
107
|
-
ClassifierReborn::Bayes.new(*@categories)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def input
|
112
|
-
@input ||= prepare_input
|
113
|
-
end
|
114
|
-
|
115
|
-
def prepare_input
|
116
|
-
unless File.pipe?(STDIN)
|
117
|
-
error("Error: No effective input. You need to give any input via the STDIN.")
|
118
|
-
exit(false)
|
119
|
-
end
|
120
|
-
@input = $stdin.readlines.join(" ")
|
121
|
-
@input = @tokenizer.tokenize(@input)
|
122
|
-
log("tokenizer: #{@tokenizer.type}")
|
123
|
-
@input.strip!
|
124
|
-
log("input: #{@input}")
|
125
|
-
@input
|
126
|
-
end
|
127
|
-
|
128
|
-
def error(message)
|
129
|
-
STDERR.puts(message)
|
130
|
-
end
|
131
|
-
|
132
|
-
def log(message)
|
133
|
-
STDERR.puts(message) if @verbose
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
@@ -1,88 +0,0 @@
|
|
1
|
-
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
-
#
|
3
|
-
# This program is free software: you can redistribute it and/or modify
|
4
|
-
# it under the terms of the GNU General Public License as published by
|
5
|
-
# the Free Software Foundation, either version 3 of the License, or
|
6
|
-
# (at your option) any later version.
|
7
|
-
#
|
8
|
-
# This program is distributed in the hope that it will be useful,
|
9
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
-
# GNU General Public License for more details.
|
12
|
-
#
|
13
|
-
# You should have received a copy of the GNU General Public License
|
14
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
-
|
16
|
-
require "tiny-classifier/base"
|
17
|
-
require "tiny-classifier/classifier"
|
18
|
-
require "fileutils"
|
19
|
-
require "base64"
|
20
|
-
|
21
|
-
module TinyClassifier
|
22
|
-
class ClassifierGenerator < Base
|
23
|
-
class << self
|
24
|
-
def run(argv=nil)
|
25
|
-
argv ||= ARGV.dup
|
26
|
-
generator = new
|
27
|
-
generator.parse_command_line_options(argv)
|
28
|
-
generator.run
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def initialize
|
33
|
-
super
|
34
|
-
@output_dir = Dir.pwd
|
35
|
-
option_parser.on("-o PATH", "--output-dir=PATH",
|
36
|
-
"Path to the classifier command to be saved (default=current directory)") do |output_dir|
|
37
|
-
@output_dir = output_dir
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def run
|
42
|
-
File.open(output_file_path, "w") do |file|
|
43
|
-
file.puts("#!/usr/bin/env ruby")
|
44
|
-
file.puts("require \"base64\"")
|
45
|
-
file.puts("require \"classifier-reborn\"")
|
46
|
-
file.puts("require \"tiny-classifier/classifier\"")
|
47
|
-
file.puts("classifier_code = Base64.strict_decode64(\"#{encoded_classifier}\")")
|
48
|
-
file.puts("classifier = TinyClassifier::Classifier.new")
|
49
|
-
file.puts("classifier.classifier = Marshal.load(classifier_code)")
|
50
|
-
file.puts("classifier.tokenizer.type = \"#{@tokenizer.type}\"")
|
51
|
-
file.puts("classifier.run")
|
52
|
-
end
|
53
|
-
FileUtils.chmod("a+x", output_file_path)
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
def encoded_classifier
|
58
|
-
@encoded_classifier ||= prepare_encoded_classifier
|
59
|
-
end
|
60
|
-
|
61
|
-
def prepare_encoded_classifier
|
62
|
-
classifier = Classifier.new
|
63
|
-
classifier.parse_command_line_options(ARGV.dup)
|
64
|
-
FileUtils.mkdir_p(output_file_path.parent)
|
65
|
-
|
66
|
-
classifier_code = Marshal.dump(classifier.classifier)
|
67
|
-
Base64.strict_encode64(classifier_code)
|
68
|
-
end
|
69
|
-
|
70
|
-
def classifier_name
|
71
|
-
@classifier_name ||= prepare_classifier_name
|
72
|
-
end
|
73
|
-
|
74
|
-
def prepare_classifier_name
|
75
|
-
categories = @categories.join("-").downcase
|
76
|
-
"tc-classify-#{categories}"
|
77
|
-
end
|
78
|
-
|
79
|
-
def output_file_path
|
80
|
-
@output_file_path ||= prepare_output_file_path
|
81
|
-
end
|
82
|
-
|
83
|
-
def prepare_output_file_path
|
84
|
-
path = Pathname(@output_dir)
|
85
|
-
path + classifier_name
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
-
#
|
3
|
-
# This program is free software: you can redistribute it and/or modify
|
4
|
-
# it under the terms of the GNU General Public License as published by
|
5
|
-
# the Free Software Foundation, either version 3 of the License, or
|
6
|
-
# (at your option) any later version.
|
7
|
-
#
|
8
|
-
# This program is distributed in the hope that it will be useful,
|
9
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
-
# GNU General Public License for more details.
|
12
|
-
#
|
13
|
-
# You should have received a copy of the GNU General Public License
|
14
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
-
|
16
|
-
require "tiny-classifier/base"
|
17
|
-
|
18
|
-
module TinyClassifier
|
19
|
-
class Trainer < Base
|
20
|
-
class << self
|
21
|
-
def run(argv=nil)
|
22
|
-
argv ||= ARGV.dup
|
23
|
-
trainer = new
|
24
|
-
*categories = trainer.parse_command_line_options(argv)
|
25
|
-
trainer.run(category: categories.first)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
super
|
31
|
-
option_parser.banner += " CATEGORY"
|
32
|
-
end
|
33
|
-
|
34
|
-
def run(params)
|
35
|
-
@category = params[:category]
|
36
|
-
prepare_category
|
37
|
-
if input.empty?
|
38
|
-
error("Error: No effective input.")
|
39
|
-
false
|
40
|
-
else
|
41
|
-
classifier.send("train_#{@category}", input)
|
42
|
-
save
|
43
|
-
true
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
private
|
48
|
-
def prepare_category
|
49
|
-
unless @category
|
50
|
-
error("Error: You need to specify the category for the input.")
|
51
|
-
exit(false)
|
52
|
-
end
|
53
|
-
|
54
|
-
@category = @category.downcase.strip
|
55
|
-
|
56
|
-
if @category.empty?
|
57
|
-
error("Error: You need to specify the category for the input.")
|
58
|
-
exit(false)
|
59
|
-
end
|
60
|
-
|
61
|
-
unless @categories.include?(@category.capitalize)
|
62
|
-
error("Error: You need to specify one of valid categories: #{@categories.join(', ')}")
|
63
|
-
exit(false)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def save
|
68
|
-
data = Marshal.dump(classifier)
|
69
|
-
File.open(data_file_path, "w") do |file|
|
70
|
-
file.write(data)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|