tiny-classifier 2.1 → 2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -0
- data/README.md +0 -0
- data/Rakefile +5 -0
- data/bin/tc-classify +2 -2
- data/bin/tc-generate-classifier +2 -2
- data/bin/tc-retrain +2 -2
- data/bin/tc-train +2 -2
- data/bin/tc-untrain +2 -2
- data/lib/tiny-classifier/category-manager.rb +61 -0
- data/lib/tiny-classifier/command/base.rb +162 -0
- data/lib/tiny-classifier/{classifier.rb → command/classify.rb} +13 -16
- data/lib/tiny-classifier/command/generate-classifier.rb +88 -0
- data/lib/tiny-classifier/command/retrain.rb +75 -0
- data/lib/tiny-classifier/{retrainer.rb → command/train.rb} +17 -21
- data/lib/tiny-classifier/command/untrain.rb +43 -0
- data/lib/tiny-classifier/errors.rb +104 -0
- data/lib/tiny-classifier/{untrainer.rb → input.rb} +12 -14
- data/lib/tiny-classifier/tokenizer.rb +0 -0
- data/tiny-classifier.gemspec +5 -1
- metadata +55 -10
- data/lib/tiny-classifier/base.rb +0 -136
- data/lib/tiny-classifier/classifier-generator.rb +0 -88
- data/lib/tiny-classifier/trainer.rb +0 -74
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e7e246aa9446d56c68ea631b0c2dda4bd0fb506
|
4
|
+
data.tar.gz: 42f9c4ca23ca91a6d2e9e2121d7eb860ded738c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3964e9cc15c6c7a4e6f49a6a7e945f3596c863169c812303293ca1c92a66407a0d96bd6abc3e4a521fa40eca524eece08bb98315521cedfe1fc816e599af80fe
|
7
|
+
data.tar.gz: 0fb9acbac0b1fc12fa72381af63085f7bf2f1aef6674ecfced6ebf6d0645a7ca274d27ae708ce6ea26f85f54fefb66ccd6b1fe2b5e0ff6c3308d30257b489c75
|
data/Gemfile
CHANGED
File without changes
|
data/README.md
CHANGED
File without changes
|
data/Rakefile
CHANGED
data/bin/tc-classify
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/
|
18
|
+
require "tiny-classifier/command/classify"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::Classify.run
|
data/bin/tc-generate-classifier
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/classifier
|
18
|
+
require "tiny-classifier/command/generate-classifier"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::GenerateClassifier.run
|
data/bin/tc-retrain
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/
|
18
|
+
require "tiny-classifier/command/retrain"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::Retrain.run
|
data/bin/tc-train
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/
|
18
|
+
require "tiny-classifier/command/train"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::Train.run
|
data/bin/tc-untrain
CHANGED
@@ -15,6 +15,6 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
require "tiny-classifier/
|
18
|
+
require "tiny-classifier/command/untrain"
|
19
19
|
|
20
|
-
TinyClassifier::
|
20
|
+
TinyClassifier::Command::Untrain.run
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
module TinyClassifier
|
17
|
+
class CategoryManager
|
18
|
+
attr_reader :chosen
|
19
|
+
|
20
|
+
def initialize(categories)
|
21
|
+
@categories = categories.strip.split(",")
|
22
|
+
normalize_all
|
23
|
+
clanup
|
24
|
+
end
|
25
|
+
|
26
|
+
def all
|
27
|
+
@categories
|
28
|
+
end
|
29
|
+
|
30
|
+
def valid?(category)
|
31
|
+
category = normalize(category)
|
32
|
+
@categories.include?(category)
|
33
|
+
end
|
34
|
+
|
35
|
+
def basename
|
36
|
+
@categories.join("-").downcase
|
37
|
+
end
|
38
|
+
|
39
|
+
def normalize(category)
|
40
|
+
category
|
41
|
+
.downcase
|
42
|
+
.strip
|
43
|
+
.capitalize
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def normalize_all
|
48
|
+
@categories.collect! do |category|
|
49
|
+
normalize(category)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def clanup
|
54
|
+
@categories.reject! do |category|
|
55
|
+
category.empty?
|
56
|
+
end
|
57
|
+
@categories.uniq!
|
58
|
+
@categories.sort!
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "pathname"
|
17
|
+
require "optparse"
|
18
|
+
require "classifier-reborn"
|
19
|
+
require "tiny-classifier/tokenizer"
|
20
|
+
require "tiny-classifier/category-manager"
|
21
|
+
require "tiny-classifier/input"
|
22
|
+
require "tiny-classifier/errors"
|
23
|
+
|
24
|
+
module TinyClassifier
|
25
|
+
module Command
|
26
|
+
class Base
|
27
|
+
class << self
|
28
|
+
def run(argv=nil)
|
29
|
+
argv ||= ARGV.dup
|
30
|
+
command = new(argv)
|
31
|
+
command.run
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
attr_reader :tokenizer
|
36
|
+
attr_writer :classifier
|
37
|
+
|
38
|
+
def initialize(argv=[])
|
39
|
+
@categories = nil
|
40
|
+
@tokenizer = Tokenizer.new
|
41
|
+
@data_dir = Dir.pwd
|
42
|
+
@verbose = false
|
43
|
+
end
|
44
|
+
|
45
|
+
def run
|
46
|
+
raise NoCategories.new unless @categories
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse_command_line_options(command_line_options)
|
50
|
+
option_parser.parse!(command_line_options)
|
51
|
+
end
|
52
|
+
|
53
|
+
def classifier
|
54
|
+
@classifier ||= prepare_classifier
|
55
|
+
end
|
56
|
+
|
57
|
+
def data_file_name
|
58
|
+
"tc.#{@categories.basename}.dat"
|
59
|
+
end
|
60
|
+
|
61
|
+
def data_file_path
|
62
|
+
@data_file_path ||= prepare_data_file_path
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
def option_parser
|
67
|
+
@option_parser ||= create_option_parser
|
68
|
+
end
|
69
|
+
|
70
|
+
def create_option_parser
|
71
|
+
parser = OptionParser.new
|
72
|
+
|
73
|
+
parser.on("-d PATH", "--data-dir=PATH",
|
74
|
+
"Path to the directory to store training data file (default=current directory)") do |data_dir|
|
75
|
+
@data_dir = data_dir
|
76
|
+
end
|
77
|
+
|
78
|
+
parser.on("-c CATEGORIES", "--categories=CATEGORIES",
|
79
|
+
"List of categories (comma-separated)") do |categories|
|
80
|
+
@categories = CategoryManager.new(categories)
|
81
|
+
end
|
82
|
+
|
83
|
+
parser.on("-t TOKENIZER", "--tokenizer=TOKENIZER",
|
84
|
+
"Tokenizer (default=#{@tokenizer})") do |tokenizer|
|
85
|
+
@tokenizer.type = tokenizer
|
86
|
+
end
|
87
|
+
|
88
|
+
parser.on("-v", "--verbose",
|
89
|
+
"Output internal information (for debugging)") do |verbose|
|
90
|
+
@verbose = verbose
|
91
|
+
end
|
92
|
+
|
93
|
+
parser
|
94
|
+
end
|
95
|
+
|
96
|
+
def prepare_data_file_path
|
97
|
+
path = Pathname(@data_dir)
|
98
|
+
path += data_file_name
|
99
|
+
log("file: #{path}")
|
100
|
+
path
|
101
|
+
end
|
102
|
+
|
103
|
+
def prepare_classifier
|
104
|
+
if data_file_path.exist?
|
105
|
+
data = File.read(data_file_path.to_s)
|
106
|
+
Marshal.load(data)
|
107
|
+
else
|
108
|
+
ClassifierReborn::Bayes.new(*@categories.all)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def save
|
113
|
+
data = Marshal.dump(classifier)
|
114
|
+
File.open(data_file_path, "w") do |file|
|
115
|
+
file.write(data)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def input
|
120
|
+
@input ||= prepare_input
|
121
|
+
end
|
122
|
+
|
123
|
+
def prepare_input
|
124
|
+
input = Input.new
|
125
|
+
raise NoInput.new unless input.given?
|
126
|
+
tokenized = @tokenizer.tokenize(input.read)
|
127
|
+
log("tokenizer: #{@tokenizer.type}")
|
128
|
+
log("tokenized: #{tokenized}")
|
129
|
+
tokenized
|
130
|
+
end
|
131
|
+
|
132
|
+
def prepare_category(category)
|
133
|
+
raise NoCategory.new unless category
|
134
|
+
|
135
|
+
category = @categories.normalize(category)
|
136
|
+
|
137
|
+
unless @categories.valid?(category)
|
138
|
+
raise InvalidCategory.new(category, @categories.all)
|
139
|
+
end
|
140
|
+
category
|
141
|
+
end
|
142
|
+
|
143
|
+
def handle_error(error)
|
144
|
+
case error
|
145
|
+
when TinyClassifierError
|
146
|
+
error(error.message)
|
147
|
+
else
|
148
|
+
error(error.inspect)
|
149
|
+
end
|
150
|
+
false
|
151
|
+
end
|
152
|
+
|
153
|
+
def error(message)
|
154
|
+
$stderr.puts(message)
|
155
|
+
end
|
156
|
+
|
157
|
+
def log(message)
|
158
|
+
$stderr.puts(message) if @verbose
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -13,29 +13,26 @@
|
|
13
13
|
# You should have received a copy of the GNU General Public License
|
14
14
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
15
|
|
16
|
-
require "tiny-classifier/base"
|
16
|
+
require "tiny-classifier/command/base"
|
17
17
|
|
18
18
|
module TinyClassifier
|
19
|
-
|
20
|
-
class
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
classifier.parse_command_line_options(argv)
|
25
|
-
classifier.run
|
19
|
+
module Command
|
20
|
+
class Classify < Base
|
21
|
+
def initialize(argv=[])
|
22
|
+
super
|
23
|
+
parse_command_line_options(argv)
|
26
24
|
end
|
27
|
-
end
|
28
25
|
|
29
|
-
|
26
|
+
def run
|
27
|
+
super
|
28
|
+
raise NoEffectiveInput.new if input.empty?
|
29
|
+
raise NoTrainingData.new(data_file_path) unless data_file_path.exist?
|
30
30
|
|
31
|
-
def run
|
32
|
-
if input.empty?
|
33
|
-
error("Error: No effective input.")
|
34
|
-
false
|
35
|
-
else
|
36
31
|
category = classifier.classify(input)
|
37
|
-
puts
|
32
|
+
$stdout.puts(category.downcase)
|
38
33
|
true
|
34
|
+
rescue StandardError => error
|
35
|
+
handle_error(error)
|
39
36
|
end
|
40
37
|
end
|
41
38
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "tiny-classifier/command/base"
|
17
|
+
require "fileutils"
|
18
|
+
require "base64"
|
19
|
+
|
20
|
+
module TinyClassifier
|
21
|
+
module Command
|
22
|
+
class GenerateClassifier < Base
|
23
|
+
def initialize(argv=[])
|
24
|
+
super
|
25
|
+
|
26
|
+
@output_dir = Dir.pwd
|
27
|
+
option_parser.on("-o PATH", "--output-dir=PATH",
|
28
|
+
"Path to the classifier command to be saved (default=current directory)") do |output_dir|
|
29
|
+
@output_dir = output_dir
|
30
|
+
end
|
31
|
+
|
32
|
+
parse_command_line_options(argv)
|
33
|
+
end
|
34
|
+
|
35
|
+
def run
|
36
|
+
super
|
37
|
+
unless data_file_path.exist?
|
38
|
+
raise NoTrainingData.new(data_file_path)
|
39
|
+
end
|
40
|
+
unless prepare_output_file_path.parent.exist?
|
41
|
+
raise InvalidOutputDir.new(prepare_output_file_path.parent)
|
42
|
+
end
|
43
|
+
|
44
|
+
FileUtils.mkdir_p(output_file_path.parent)
|
45
|
+
File.open(output_file_path, "w") do |file|
|
46
|
+
file.puts("#!/usr/bin/env ruby")
|
47
|
+
file.puts("require \"base64\"")
|
48
|
+
file.puts("require \"classifier-reborn\"")
|
49
|
+
file.puts("require \"tiny-classifier/command/classify\"")
|
50
|
+
file.puts("classifier_code = Base64.strict_decode64(\"#{encoded_classifier}\")")
|
51
|
+
file.puts("command = TinyClassifier::Command::Classify.new([")
|
52
|
+
file.puts(" \"--categories=#{@categories.all.join(",")}\",")
|
53
|
+
file.puts(" \"--tokenizer=#{@tokenizer.type}\",")
|
54
|
+
file.puts("])")
|
55
|
+
file.puts("command.classifier = Marshal.load(classifier_code)")
|
56
|
+
file.puts("command.run")
|
57
|
+
end
|
58
|
+
FileUtils.chmod("a+x", output_file_path)
|
59
|
+
true
|
60
|
+
rescue StandardError => error
|
61
|
+
handle_error(error)
|
62
|
+
end
|
63
|
+
|
64
|
+
def classifier_name
|
65
|
+
@classifier_name ||= "tc-classify-#{@categories.basename}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def output_file_path
|
69
|
+
@output_file_path ||= prepare_output_file_path
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def encoded_classifier
|
74
|
+
@encoded_classifier ||= prepare_encoded_classifier
|
75
|
+
end
|
76
|
+
|
77
|
+
def prepare_encoded_classifier
|
78
|
+
classifier_code = Marshal.dump(classifier)
|
79
|
+
Base64.strict_encode64(classifier_code)
|
80
|
+
end
|
81
|
+
|
82
|
+
def prepare_output_file_path
|
83
|
+
path = Pathname(@output_dir)
|
84
|
+
path + classifier_name
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "tiny-classifier/command/train"
|
17
|
+
|
18
|
+
module TinyClassifier
|
19
|
+
module Command
|
20
|
+
class Retrain < Base
|
21
|
+
def initialize(argv=[])
|
22
|
+
super
|
23
|
+
option_parser.banner += " WRONG CORRECT"
|
24
|
+
*categories = parse_command_line_options(argv)
|
25
|
+
@wrong_category = categories.shift
|
26
|
+
@correct_category = categories.shift
|
27
|
+
end
|
28
|
+
|
29
|
+
def run
|
30
|
+
super
|
31
|
+
prepare_categories
|
32
|
+
raise NoEffectiveInput.new if input.empty?
|
33
|
+
raise NoTrainingData.new(data_file_path) unless data_file_path.exist?
|
34
|
+
|
35
|
+
classifier.untrain(@wrong_category, input)
|
36
|
+
classifier.train(@correct_category, input)
|
37
|
+
save
|
38
|
+
true
|
39
|
+
rescue StandardError => error
|
40
|
+
handle_error(error)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def prepare_categories
|
45
|
+
begin
|
46
|
+
@wrong_category = prepare_category(@wrong_category)
|
47
|
+
rescue StandardError => error
|
48
|
+
case error
|
49
|
+
when NoCategory
|
50
|
+
raise NoWrongCategory.new
|
51
|
+
when InvalidCategory
|
52
|
+
raise InvalidWrongCategory.new(@wrong_category, @categories.all)
|
53
|
+
else
|
54
|
+
raise error
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
begin
|
59
|
+
@correct_category = prepare_category(@correct_category)
|
60
|
+
rescue StandardError => error
|
61
|
+
case error
|
62
|
+
when NoCategory
|
63
|
+
raise NoCorrectCategory.new
|
64
|
+
when InvalidCategory
|
65
|
+
raise InvalidCorrectCategory.new(@correct_category, @categories.all)
|
66
|
+
else
|
67
|
+
raise error
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
log("training as: #{@wrong_category} => #{@correct_category}")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -13,33 +13,29 @@
|
|
13
13
|
# You should have received a copy of the GNU General Public License
|
14
14
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
15
|
|
16
|
-
require "tiny-classifier/
|
16
|
+
require "tiny-classifier/command/base"
|
17
17
|
|
18
18
|
module TinyClassifier
|
19
|
-
|
20
|
-
class
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
*categories =
|
25
|
-
|
26
|
-
correct: categories[1])
|
19
|
+
module Command
|
20
|
+
class Train < Base
|
21
|
+
def initialize(argv=[])
|
22
|
+
super
|
23
|
+
option_parser.banner += " CATEGORY"
|
24
|
+
*categories = parse_command_line_options(argv)
|
25
|
+
@category = categories.first
|
27
26
|
end
|
28
|
-
end
|
29
27
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
classifier.send("untrain_#{@category}", input)
|
38
|
-
@category = params[:correct]
|
39
|
-
prepare_category
|
40
|
-
classifier.send("train_#{@category}", input)
|
28
|
+
def run
|
29
|
+
super
|
30
|
+
@category = prepare_category(@category)
|
31
|
+
log("training as: #{@category}")
|
32
|
+
raise NoEffectiveInput.new if input.empty?
|
33
|
+
|
34
|
+
classifier.train(@category, input)
|
41
35
|
save
|
42
36
|
true
|
37
|
+
rescue StandardError => error
|
38
|
+
handle_error(error)
|
43
39
|
end
|
44
40
|
end
|
45
41
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "tiny-classifier/command/base"
|
17
|
+
|
18
|
+
module TinyClassifier
|
19
|
+
module Command
|
20
|
+
class Untrain < Base
|
21
|
+
def initialize(argv=[])
|
22
|
+
super
|
23
|
+
option_parser.banner += " CATEGORY"
|
24
|
+
*categories = parse_command_line_options(argv)
|
25
|
+
@category = categories.first
|
26
|
+
end
|
27
|
+
|
28
|
+
def run
|
29
|
+
super
|
30
|
+
@category = prepare_category(@category)
|
31
|
+
log("untraining as: #{@category}")
|
32
|
+
raise NoEffectiveInput.new if input.empty?
|
33
|
+
raise NoTrainingData.new(data_file_path) unless data_file_path.exist?
|
34
|
+
|
35
|
+
classifier.untrain(@category, input)
|
36
|
+
save
|
37
|
+
true
|
38
|
+
rescue StandardError => error
|
39
|
+
handle_error(error)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
module TinyClassifier
|
17
|
+
class TinyClassifierError < StandardError
|
18
|
+
end
|
19
|
+
|
20
|
+
class NoInput < TinyClassifierError
|
21
|
+
def message
|
22
|
+
"No input. You need to give any input via the STDIN."
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class NoEffectiveInput < TinyClassifierError
|
27
|
+
def message
|
28
|
+
"No effective input."
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class NoCategories < TinyClassifierError
|
33
|
+
def message
|
34
|
+
"You need to specify categories."
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class NoCategory < TinyClassifierError
|
39
|
+
def message
|
40
|
+
"You need to specify a category for the input."
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class NoWrongCategory < NoCategory
|
45
|
+
def message
|
46
|
+
"You need to specify a category to untrain the input."
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class NoCorrectCategory < NoCategory
|
51
|
+
def message
|
52
|
+
"You need to specify a category to retrain the input."
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class InvalidCategory < TinyClassifierError
|
57
|
+
attr_reader :category, :categories
|
58
|
+
|
59
|
+
def initialize(category, categories)
|
60
|
+
@category = category
|
61
|
+
@categories = categories
|
62
|
+
end
|
63
|
+
|
64
|
+
def message
|
65
|
+
"You need to specify one of valid categories: #{@categories.join(", ")}"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class InvalidWrongCategory < InvalidCategory
|
70
|
+
def message
|
71
|
+
"You need to specify one of valid categories to untrain: #{@categories.join(", ")}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class InvalidCorrectCategory < InvalidCategory
|
76
|
+
def message
|
77
|
+
"You need to specify one of valid categories to retrain: #{@categories.join(", ")}"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class NoTrainingData < TinyClassifierError
|
82
|
+
attr_reader :data_dir
|
83
|
+
|
84
|
+
def initialize(data_dir)
|
85
|
+
@data_dir = data_dir
|
86
|
+
end
|
87
|
+
|
88
|
+
def message
|
89
|
+
"There is no training data at #{@data_dir}."
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class InvalidOutputDir < TinyClassifierError
|
94
|
+
attr_reader :output_dir
|
95
|
+
|
96
|
+
def initialize(output_dir)
|
97
|
+
@output_dir = output_dir
|
98
|
+
end
|
99
|
+
|
100
|
+
def message
|
101
|
+
"#{@output_dir} is not available as the output directory."
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -13,21 +13,19 @@
|
|
13
13
|
# You should have received a copy of the GNU General Public License
|
14
14
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
15
|
|
16
|
-
require "tiny-classifier/trainer"
|
17
|
-
|
18
16
|
module TinyClassifier
|
19
|
-
class
|
20
|
-
def
|
21
|
-
@
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
17
|
+
class Input
|
18
|
+
def initialize(data = nil)
|
19
|
+
@data = data
|
20
|
+
end
|
21
|
+
|
22
|
+
def given?
|
23
|
+
return true if @data or $stdin.is_a?(StringIO)
|
24
|
+
File.pipe?(STDIN)
|
25
|
+
end
|
26
|
+
|
27
|
+
def read
|
28
|
+
@data ||= $stdin.readlines.join(" ").strip
|
31
29
|
end
|
32
30
|
end
|
33
31
|
end
|
File without changes
|
data/tiny-classifier.gemspec
CHANGED
@@ -21,7 +21,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
|
|
21
21
|
|
22
22
|
Gem::Specification.new do |spec|
|
23
23
|
spec.name = "tiny-classifier"
|
24
|
-
spec.version = "2.
|
24
|
+
spec.version = "2.2"
|
25
25
|
spec.homepage = "https://github.com/piroor/tiny-classifier"
|
26
26
|
spec.authors = ["YUKI \"Piro\" Hiroshi"]
|
27
27
|
spec.email = ["piro.outsider.reflex@gmail.com"]
|
@@ -39,4 +39,8 @@ Gem::Specification.new do |spec|
|
|
39
39
|
|
40
40
|
spec.add_runtime_dependency("classifier-reborn")
|
41
41
|
spec.add_runtime_dependency("natto")
|
42
|
+
|
43
|
+
spec.add_development_dependency("bundler")
|
44
|
+
spec.add_development_dependency("rake")
|
45
|
+
spec.add_development_dependency("test-unit")
|
42
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '2.
|
4
|
+
version: '2.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- YUKI "Piro" Hiroshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: classifier-reborn
|
@@ -38,15 +38,57 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: test-unit
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
41
83
|
description: ''
|
42
84
|
email:
|
43
85
|
- piro.outsider.reflex@gmail.com
|
44
86
|
executables:
|
45
|
-
- tc-classify
|
46
87
|
- tc-generate-classifier
|
88
|
+
- tc-untrain
|
47
89
|
- tc-retrain
|
48
90
|
- tc-train
|
49
|
-
- tc-
|
91
|
+
- tc-classify
|
50
92
|
extensions: []
|
51
93
|
extra_rdoc_files: []
|
52
94
|
files:
|
@@ -58,13 +100,16 @@ files:
|
|
58
100
|
- bin/tc-retrain
|
59
101
|
- bin/tc-train
|
60
102
|
- bin/tc-untrain
|
61
|
-
- lib/tiny-classifier/
|
62
|
-
- lib/tiny-classifier/
|
63
|
-
- lib/tiny-classifier/
|
64
|
-
- lib/tiny-classifier/
|
103
|
+
- lib/tiny-classifier/category-manager.rb
|
104
|
+
- lib/tiny-classifier/command/base.rb
|
105
|
+
- lib/tiny-classifier/command/classify.rb
|
106
|
+
- lib/tiny-classifier/command/generate-classifier.rb
|
107
|
+
- lib/tiny-classifier/command/retrain.rb
|
108
|
+
- lib/tiny-classifier/command/train.rb
|
109
|
+
- lib/tiny-classifier/command/untrain.rb
|
110
|
+
- lib/tiny-classifier/errors.rb
|
111
|
+
- lib/tiny-classifier/input.rb
|
65
112
|
- lib/tiny-classifier/tokenizer.rb
|
66
|
-
- lib/tiny-classifier/trainer.rb
|
67
|
-
- lib/tiny-classifier/untrainer.rb
|
68
113
|
- tiny-classifier.gemspec
|
69
114
|
homepage: https://github.com/piroor/tiny-classifier
|
70
115
|
licenses:
|
data/lib/tiny-classifier/base.rb
DELETED
@@ -1,136 +0,0 @@
|
|
1
|
-
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
-
#
|
3
|
-
# This program is free software: you can redistribute it and/or modify
|
4
|
-
# it under the terms of the GNU General Public License as published by
|
5
|
-
# the Free Software Foundation, either version 3 of the License, or
|
6
|
-
# (at your option) any later version.
|
7
|
-
#
|
8
|
-
# This program is distributed in the hope that it will be useful,
|
9
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
-
# GNU General Public License for more details.
|
12
|
-
#
|
13
|
-
# You should have received a copy of the GNU General Public License
|
14
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
-
|
16
|
-
require "pathname"
|
17
|
-
require "optparse"
|
18
|
-
require "classifier-reborn"
|
19
|
-
require "tiny-classifier/tokenizer"
|
20
|
-
|
21
|
-
module TinyClassifier
|
22
|
-
class Base
|
23
|
-
attr_reader :tokenizer
|
24
|
-
|
25
|
-
def initialize
|
26
|
-
@tokenizer = Tokenizer.new
|
27
|
-
@data_dir = Dir.pwd
|
28
|
-
@verbose = false
|
29
|
-
end
|
30
|
-
|
31
|
-
def parse_command_line_options(command_line_options)
|
32
|
-
option_parser.parse!(command_line_options)
|
33
|
-
end
|
34
|
-
|
35
|
-
def classifier
|
36
|
-
@classifier ||= prepare_classifier
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
def option_parser
|
41
|
-
@option_parser ||= create_option_parser
|
42
|
-
end
|
43
|
-
|
44
|
-
def create_option_parser
|
45
|
-
parser = OptionParser.new
|
46
|
-
|
47
|
-
parser.on("-d PATH", "--data-dir=PATH",
|
48
|
-
"Path to the directory to store training data file (default=current directory)") do |data_dir|
|
49
|
-
@data_dir = data_dir
|
50
|
-
end
|
51
|
-
|
52
|
-
parser.on("-c CATEGORIES", "--categories=CATEGORIES",
|
53
|
-
"List of categories (comma-separated)") do |categories|
|
54
|
-
@categories = normalize_categories(categories)
|
55
|
-
log("categories: #{@categories}")
|
56
|
-
end
|
57
|
-
|
58
|
-
parser.on("-t TOKENIZER", "--tokenizer=TOKENIZER",
|
59
|
-
"Tokenizer (default=#{@tokenizer})") do |tokenizer|
|
60
|
-
@tokenizer.type = tokenizer
|
61
|
-
end
|
62
|
-
|
63
|
-
parser.on("-v", "--verbose",
|
64
|
-
"Output internal information (for debugging)") do |verbose|
|
65
|
-
@verbose = verbose
|
66
|
-
end
|
67
|
-
|
68
|
-
parser
|
69
|
-
end
|
70
|
-
|
71
|
-
def normalize_categories(categories)
|
72
|
-
categories
|
73
|
-
.strip
|
74
|
-
.downcase
|
75
|
-
.split(",")
|
76
|
-
.collect(&:strip)
|
77
|
-
.reject do |category|
|
78
|
-
category.empty?
|
79
|
-
end
|
80
|
-
.sort
|
81
|
-
.collect(&:capitalize)
|
82
|
-
end
|
83
|
-
|
84
|
-
def data_file_name
|
85
|
-
@data_file_basename ||= prepare_data_file_name
|
86
|
-
end
|
87
|
-
|
88
|
-
def prepare_data_file_name
|
89
|
-
categories = @categories.join("-").downcase
|
90
|
-
"tc.#{categories}.dat"
|
91
|
-
end
|
92
|
-
|
93
|
-
def data_file_path
|
94
|
-
@data_file_path ||= prepare_data_file_path
|
95
|
-
end
|
96
|
-
|
97
|
-
def prepare_data_file_path
|
98
|
-
path = Pathname(@data_dir)
|
99
|
-
path + data_file_name
|
100
|
-
end
|
101
|
-
|
102
|
-
def prepare_classifier
|
103
|
-
if data_file_path.exist?
|
104
|
-
data = File.read(data_file_path.to_s)
|
105
|
-
Marshal.load(data)
|
106
|
-
else
|
107
|
-
ClassifierReborn::Bayes.new(*@categories)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def input
|
112
|
-
@input ||= prepare_input
|
113
|
-
end
|
114
|
-
|
115
|
-
def prepare_input
|
116
|
-
unless File.pipe?(STDIN)
|
117
|
-
error("Error: No effective input. You need to give any input via the STDIN.")
|
118
|
-
exit(false)
|
119
|
-
end
|
120
|
-
@input = $stdin.readlines.join(" ")
|
121
|
-
@input = @tokenizer.tokenize(@input)
|
122
|
-
log("tokenizer: #{@tokenizer.type}")
|
123
|
-
@input.strip!
|
124
|
-
log("input: #{@input}")
|
125
|
-
@input
|
126
|
-
end
|
127
|
-
|
128
|
-
def error(message)
|
129
|
-
STDERR.puts(message)
|
130
|
-
end
|
131
|
-
|
132
|
-
def log(message)
|
133
|
-
STDERR.puts(message) if @verbose
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
@@ -1,88 +0,0 @@
|
|
1
|
-
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
-
#
|
3
|
-
# This program is free software: you can redistribute it and/or modify
|
4
|
-
# it under the terms of the GNU General Public License as published by
|
5
|
-
# the Free Software Foundation, either version 3 of the License, or
|
6
|
-
# (at your option) any later version.
|
7
|
-
#
|
8
|
-
# This program is distributed in the hope that it will be useful,
|
9
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
-
# GNU General Public License for more details.
|
12
|
-
#
|
13
|
-
# You should have received a copy of the GNU General Public License
|
14
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
-
|
16
|
-
require "tiny-classifier/base"
|
17
|
-
require "tiny-classifier/classifier"
|
18
|
-
require "fileutils"
|
19
|
-
require "base64"
|
20
|
-
|
21
|
-
module TinyClassifier
|
22
|
-
class ClassifierGenerator < Base
|
23
|
-
class << self
|
24
|
-
def run(argv=nil)
|
25
|
-
argv ||= ARGV.dup
|
26
|
-
generator = new
|
27
|
-
generator.parse_command_line_options(argv)
|
28
|
-
generator.run
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def initialize
|
33
|
-
super
|
34
|
-
@output_dir = Dir.pwd
|
35
|
-
option_parser.on("-o PATH", "--output-dir=PATH",
|
36
|
-
"Path to the classifier command to be saved (default=current directory)") do |output_dir|
|
37
|
-
@output_dir = output_dir
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def run
|
42
|
-
File.open(output_file_path, "w") do |file|
|
43
|
-
file.puts("#!/usr/bin/env ruby")
|
44
|
-
file.puts("require \"base64\"")
|
45
|
-
file.puts("require \"classifier-reborn\"")
|
46
|
-
file.puts("require \"tiny-classifier/classifier\"")
|
47
|
-
file.puts("classifier_code = Base64.strict_decode64(\"#{encoded_classifier}\")")
|
48
|
-
file.puts("classifier = TinyClassifier::Classifier.new")
|
49
|
-
file.puts("classifier.classifier = Marshal.load(classifier_code)")
|
50
|
-
file.puts("classifier.tokenizer.type = \"#{@tokenizer.type}\"")
|
51
|
-
file.puts("classifier.run")
|
52
|
-
end
|
53
|
-
FileUtils.chmod("a+x", output_file_path)
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
def encoded_classifier
|
58
|
-
@encoded_classifier ||= prepare_encoded_classifier
|
59
|
-
end
|
60
|
-
|
61
|
-
def prepare_encoded_classifier
|
62
|
-
classifier = Classifier.new
|
63
|
-
classifier.parse_command_line_options(ARGV.dup)
|
64
|
-
FileUtils.mkdir_p(output_file_path.parent)
|
65
|
-
|
66
|
-
classifier_code = Marshal.dump(classifier.classifier)
|
67
|
-
Base64.strict_encode64(classifier_code)
|
68
|
-
end
|
69
|
-
|
70
|
-
def classifier_name
|
71
|
-
@classifier_name ||= prepare_classifier_name
|
72
|
-
end
|
73
|
-
|
74
|
-
def prepare_classifier_name
|
75
|
-
categories = @categories.join("-").downcase
|
76
|
-
"tc-classify-#{categories}"
|
77
|
-
end
|
78
|
-
|
79
|
-
def output_file_path
|
80
|
-
@output_file_path ||= prepare_output_file_path
|
81
|
-
end
|
82
|
-
|
83
|
-
def prepare_output_file_path
|
84
|
-
path = Pathname(@output_dir)
|
85
|
-
path + classifier_name
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
-
#
|
3
|
-
# This program is free software: you can redistribute it and/or modify
|
4
|
-
# it under the terms of the GNU General Public License as published by
|
5
|
-
# the Free Software Foundation, either version 3 of the License, or
|
6
|
-
# (at your option) any later version.
|
7
|
-
#
|
8
|
-
# This program is distributed in the hope that it will be useful,
|
9
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
-
# GNU General Public License for more details.
|
12
|
-
#
|
13
|
-
# You should have received a copy of the GNU General Public License
|
14
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
-
|
16
|
-
require "tiny-classifier/base"
|
17
|
-
|
18
|
-
module TinyClassifier
|
19
|
-
class Trainer < Base
|
20
|
-
class << self
|
21
|
-
def run(argv=nil)
|
22
|
-
argv ||= ARGV.dup
|
23
|
-
trainer = new
|
24
|
-
*categories = trainer.parse_command_line_options(argv)
|
25
|
-
trainer.run(category: categories.first)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
super
|
31
|
-
option_parser.banner += " CATEGORY"
|
32
|
-
end
|
33
|
-
|
34
|
-
def run(params)
|
35
|
-
@category = params[:category]
|
36
|
-
prepare_category
|
37
|
-
if input.empty?
|
38
|
-
error("Error: No effective input.")
|
39
|
-
false
|
40
|
-
else
|
41
|
-
classifier.send("train_#{@category}", input)
|
42
|
-
save
|
43
|
-
true
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
private
|
48
|
-
def prepare_category
|
49
|
-
unless @category
|
50
|
-
error("Error: You need to specify the category for the input.")
|
51
|
-
exit(false)
|
52
|
-
end
|
53
|
-
|
54
|
-
@category = @category.downcase.strip
|
55
|
-
|
56
|
-
if @category.empty?
|
57
|
-
error("Error: You need to specify the category for the input.")
|
58
|
-
exit(false)
|
59
|
-
end
|
60
|
-
|
61
|
-
unless @categories.include?(@category.capitalize)
|
62
|
-
error("Error: You need to specify one of valid categories: #{@categories.join(', ')}")
|
63
|
-
exit(false)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def save
|
68
|
-
data = Marshal.dump(classifier)
|
69
|
-
File.open(data_file_path, "w") do |file|
|
70
|
-
file.write(data)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|