tiny-classifier 1.5 → 2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -12
- data/bin/tc-retrain +20 -0
- data/lib/tiny-classifier/base.rb +10 -10
- data/lib/tiny-classifier/classifier-generator.rb +2 -2
- data/lib/tiny-classifier/classifier.rb +2 -2
- data/lib/tiny-classifier/retrainer.rb +46 -0
- data/lib/tiny-classifier/trainer.rb +14 -14
- data/lib/tiny-classifier/untrainer.rb +3 -3
- data/tiny-classifier.gemspec +1 -1
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ff0e4cd3aafc37e4ba99b782d3b7816c6bbb5f9f
|
4
|
+
data.tar.gz: d9b99162fe1711f3f7cdaaf3791f1df9d75df14f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 933a1773de70ee773863a643556ec4dfedff6433ee3385b4ce8f3283efa1959e25027fae1712f410c39d70f72b74a06b231299d5041a12276afe8c771f948047
|
7
|
+
data.tar.gz: 93602fc9af2bda18c52beaefd980b912aa438b95526c549d965fde192c3f3061fbb1ebffaf13de221ec92ae57e7cf25b06adb3c0c83c90f16520aa3208037cab
|
data/README.md
CHANGED
@@ -27,34 +27,34 @@ This is example on Ubuntu.
|
|
27
27
|
Training:
|
28
28
|
|
29
29
|
```
|
30
|
-
% echo "Hello, world!" | tc-train --
|
31
|
-
% echo "I'm very very happy!" | tc-train --
|
32
|
-
% echo "I'm so bad..." | tc-train --
|
33
|
-
% echo "Oh my god!" | tc-train --
|
30
|
+
% echo "Hello, world!" | tc-train --categories=positive,negative positive
|
31
|
+
% echo "I'm very very happy!" | tc-train --categories=positive,negative positive
|
32
|
+
% echo "I'm so bad..." | tc-train --categories=positive,negative negative
|
33
|
+
% echo "Oh my god!" | tc-train --categories=positive,negative negative
|
34
34
|
```
|
35
35
|
|
36
|
-
The training data will be saved as `tc.negative-positive.dat` (`tc.` is the fixed prefix, `.dat` is the fixed suffix. The middle part is filled by given
|
36
|
+
The training data will be saved as `tc.negative-positive.dat` (`tc.` is the fixed prefix, `.dat` is the fixed suffix. The middle part is filled by given categories automatically.) in the current directory. If you hope the file to be saved in any different place, please specify `--base-dir=/path/to/data/directory`.
|
37
37
|
|
38
38
|
Untraining for mistakes:
|
39
39
|
|
40
40
|
```
|
41
|
-
% echo "I'm so bad..." | tc-untrain --
|
41
|
+
% echo "I'm so bad..." | tc-untrain --categories=positive,negative positive
|
42
42
|
```
|
43
43
|
|
44
44
|
Testing to classify:
|
45
45
|
|
46
46
|
~~~
|
47
|
-
% echo "Happy day?" | tc-classify --
|
47
|
+
% echo "Happy day?" | tc-classify --categories=positive,negative
|
48
48
|
positive
|
49
49
|
~~~
|
50
50
|
|
51
51
|
If you think that the classifier has been enoughly trained, then you can generate a fixed classifier:
|
52
52
|
|
53
53
|
~~~
|
54
|
-
% tc-generate-classifier --
|
54
|
+
% tc-generate-classifier --categories=positive,negative --output-dir=/path/to/dir
|
55
55
|
~~~
|
56
56
|
|
57
|
-
Then a fixed classifier (executable Ruby script) will be generated as `tc-classify-negative-positive` (`tc-classify-` is the fixed prefix, rest is filled by given
|
57
|
+
Then a fixed classifier (executable Ruby script) will be generated as `tc-classify-negative-positive` (`tc-classify-` is the fixed prefix, rest is filled by given categories automatically.)
|
58
58
|
|
59
59
|
~~~
|
60
60
|
% ls /path/to/dir/
|
@@ -67,8 +67,8 @@ positive
|
|
67
67
|
|
68
68
|
### Common
|
69
69
|
|
70
|
-
`-l`, `--
|
71
|
-
: A comman-separated list of
|
70
|
+
`-l`, `--categories=CATEGORIES` (required)
|
71
|
+
: A comman-separated list of categories. You should use only alphabetic characters. (Non-alphabetical characters will cause problems.)
|
72
72
|
|
73
73
|
`-d`, `--data-dir=PATH` (optional)
|
74
74
|
: The path to the directory that the training data to be saved. The current directory is the default value.
|
@@ -78,7 +78,7 @@ positive
|
|
78
78
|
|
79
79
|
### `tc-train` and `tc-untrain` specific parameters
|
80
80
|
|
81
|
-
Both `tc-train` and `tc-untrain` require one command line argument: the
|
81
|
+
Both `tc-train` and `tc-untrain` require one command line argument: the category. You need to specify one of categories given via the `--categories` parameter.
|
82
82
|
|
83
83
|
### `tc-generate-classifier` specific parameters
|
84
84
|
|
data/bin/tc-retrain
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
require "tiny-classifier/retrainer"
|
19
|
+
|
20
|
+
TinyClassifier::Retrainer.run
|
data/lib/tiny-classifier/base.rb
CHANGED
@@ -48,9 +48,9 @@ module TinyClassifier
|
|
48
48
|
@data_dir = data_dir
|
49
49
|
end
|
50
50
|
|
51
|
-
parser.on("-
|
52
|
-
"List of
|
53
|
-
@
|
51
|
+
parser.on("-c CATEGORIES", "--categories=CATEGORIES",
|
52
|
+
"List of categories (comma-separated)") do |categories|
|
53
|
+
@categories = normalize_categories(categories)
|
54
54
|
end
|
55
55
|
|
56
56
|
parser.on("-t TOKENIZER", "--tokenizer=TOKENIZER",
|
@@ -61,14 +61,14 @@ module TinyClassifier
|
|
61
61
|
parser
|
62
62
|
end
|
63
63
|
|
64
|
-
def
|
65
|
-
|
64
|
+
def normalize_categories(categories)
|
65
|
+
categories
|
66
66
|
.strip
|
67
67
|
.downcase
|
68
68
|
.split(",")
|
69
69
|
.collect(&:strip)
|
70
|
-
.reject do |
|
71
|
-
|
70
|
+
.reject do |category|
|
71
|
+
category.empty?
|
72
72
|
end
|
73
73
|
.sort
|
74
74
|
.collect(&:capitalize)
|
@@ -79,8 +79,8 @@ module TinyClassifier
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def prepare_data_file_name
|
82
|
-
|
83
|
-
"tc.#{
|
82
|
+
categories = @categories.join("-").downcase
|
83
|
+
"tc.#{categories}.dat"
|
84
84
|
end
|
85
85
|
|
86
86
|
def data_file_path
|
@@ -97,7 +97,7 @@ module TinyClassifier
|
|
97
97
|
data = File.read(data_file_path.to_s)
|
98
98
|
Marshal.load(data)
|
99
99
|
else
|
100
|
-
ClassifierReborn::Bayes.new(*@
|
100
|
+
ClassifierReborn::Bayes.new(*@categories)
|
101
101
|
end
|
102
102
|
end
|
103
103
|
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "tiny-classifier/trainer"
|
17
|
+
|
18
|
+
module TinyClassifier
|
19
|
+
class Retrainer < Trainer
|
20
|
+
class << self
|
21
|
+
def run(argv=nil)
|
22
|
+
argv ||= ARGV.dup
|
23
|
+
retrainer = new
|
24
|
+
*categories = retrainer.parse_command_line_options(argv)
|
25
|
+
retrainer.run(wrong: categories[0],
|
26
|
+
correct: categories[1])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def run(params)
|
31
|
+
if input.empty?
|
32
|
+
STDERR.puts("Error: No effective input.")
|
33
|
+
false
|
34
|
+
else
|
35
|
+
@category = params[:wrong]
|
36
|
+
prepare_category
|
37
|
+
classifier.send("untrain_#{@category}", input)
|
38
|
+
@category = params[:correct]
|
39
|
+
prepare_category
|
40
|
+
classifier.send("train_#{@category}", input)
|
41
|
+
save
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -21,45 +21,45 @@ module TinyClassifier
|
|
21
21
|
def run(argv=nil)
|
22
22
|
argv ||= ARGV.dup
|
23
23
|
trainer = new
|
24
|
-
*
|
25
|
-
trainer.run(
|
24
|
+
*categories = trainer.parse_command_line_options(argv)
|
25
|
+
trainer.run(category: categories.first)
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
29
|
def initialize
|
30
30
|
super
|
31
|
-
option_parser.banner += "
|
31
|
+
option_parser.banner += " CATEGORY"
|
32
32
|
end
|
33
33
|
|
34
34
|
def run(params)
|
35
|
-
@
|
36
|
-
|
35
|
+
@category = params[:category]
|
36
|
+
prepare_category
|
37
37
|
if input.empty?
|
38
38
|
STDERR.puts("Error: No effective input.")
|
39
39
|
false
|
40
40
|
else
|
41
|
-
classifier.send("train_#{@
|
41
|
+
classifier.send("train_#{@category}", input)
|
42
42
|
save
|
43
43
|
true
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
47
|
private
|
48
|
-
def
|
49
|
-
unless @
|
50
|
-
STDERR.puts("Error: You need to specify the
|
48
|
+
def prepare_category
|
49
|
+
unless @category
|
50
|
+
STDERR.puts("Error: You need to specify the category for the input.")
|
51
51
|
exit(false)
|
52
52
|
end
|
53
53
|
|
54
|
-
@
|
54
|
+
@category = @category.downcase.strip
|
55
55
|
|
56
|
-
if @
|
57
|
-
STDERR.puts("Error: You need to specify the
|
56
|
+
if @category.empty?
|
57
|
+
STDERR.puts("Error: You need to specify the category for the input.")
|
58
58
|
exit(false)
|
59
59
|
end
|
60
60
|
|
61
|
-
unless @
|
62
|
-
STDERR.puts("Error: You need to specify one of valid
|
61
|
+
unless @categories.include?(@category.capitalize)
|
62
|
+
STDERR.puts("Error: You need to specify one of valid categories: #{@categories.join(', ')}")
|
63
63
|
exit(false)
|
64
64
|
end
|
65
65
|
end
|
@@ -18,13 +18,13 @@ require "tiny-classifier/trainer"
|
|
18
18
|
module TinyClassifier
|
19
19
|
class Untrainer < Trainer
|
20
20
|
def run(params)
|
21
|
-
@
|
22
|
-
|
21
|
+
@category = params[:category]
|
22
|
+
prepare_category
|
23
23
|
if input.empty?
|
24
24
|
STDERR.puts("Error: No effective input.")
|
25
25
|
false
|
26
26
|
else
|
27
|
-
classifier.send("untrain_#{@
|
27
|
+
classifier.send("untrain_#{@category}", input)
|
28
28
|
save
|
29
29
|
true
|
30
30
|
end
|
data/tiny-classifier.gemspec
CHANGED
@@ -21,7 +21,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
|
|
21
21
|
|
22
22
|
Gem::Specification.new do |spec|
|
23
23
|
spec.name = "tiny-classifier"
|
24
|
-
spec.version = "
|
24
|
+
spec.version = "2.0"
|
25
25
|
spec.homepage = "https://github.com/piroor/tiny-classifier"
|
26
26
|
spec.authors = ["YUKI \"Piro\" Hiroshi"]
|
27
27
|
spec.email = ["piro.outsider.reflex@gmail.com"]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '
|
4
|
+
version: '2.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- YUKI "Piro" Hiroshi
|
@@ -44,6 +44,7 @@ email:
|
|
44
44
|
executables:
|
45
45
|
- tc-classify
|
46
46
|
- tc-generate-classifier
|
47
|
+
- tc-retrain
|
47
48
|
- tc-train
|
48
49
|
- tc-untrain
|
49
50
|
extensions: []
|
@@ -54,11 +55,13 @@ files:
|
|
54
55
|
- Rakefile
|
55
56
|
- bin/tc-classify
|
56
57
|
- bin/tc-generate-classifier
|
58
|
+
- bin/tc-retrain
|
57
59
|
- bin/tc-train
|
58
60
|
- bin/tc-untrain
|
59
61
|
- lib/tiny-classifier/base.rb
|
60
62
|
- lib/tiny-classifier/classifier-generator.rb
|
61
63
|
- lib/tiny-classifier/classifier.rb
|
64
|
+
- lib/tiny-classifier/retrainer.rb
|
62
65
|
- lib/tiny-classifier/tokenizer.rb
|
63
66
|
- lib/tiny-classifier/trainer.rb
|
64
67
|
- lib/tiny-classifier/untrainer.rb
|