tiny-classifier 1.5 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -12
- data/bin/tc-retrain +20 -0
- data/lib/tiny-classifier/base.rb +10 -10
- data/lib/tiny-classifier/classifier-generator.rb +2 -2
- data/lib/tiny-classifier/classifier.rb +2 -2
- data/lib/tiny-classifier/retrainer.rb +46 -0
- data/lib/tiny-classifier/trainer.rb +14 -14
- data/lib/tiny-classifier/untrainer.rb +3 -3
- data/tiny-classifier.gemspec +1 -1
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ff0e4cd3aafc37e4ba99b782d3b7816c6bbb5f9f
|
4
|
+
data.tar.gz: d9b99162fe1711f3f7cdaaf3791f1df9d75df14f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 933a1773de70ee773863a643556ec4dfedff6433ee3385b4ce8f3283efa1959e25027fae1712f410c39d70f72b74a06b231299d5041a12276afe8c771f948047
|
7
|
+
data.tar.gz: 93602fc9af2bda18c52beaefd980b912aa438b95526c549d965fde192c3f3061fbb1ebffaf13de221ec92ae57e7cf25b06adb3c0c83c90f16520aa3208037cab
|
data/README.md
CHANGED
@@ -27,34 +27,34 @@ This is example on Ubuntu.
|
|
27
27
|
Training:
|
28
28
|
|
29
29
|
```
|
30
|
-
% echo "Hello, world!" | tc-train --
|
31
|
-
% echo "I'm very very happy!" | tc-train --
|
32
|
-
% echo "I'm so bad..." | tc-train --
|
33
|
-
% echo "Oh my god!" | tc-train --
|
30
|
+
% echo "Hello, world!" | tc-train --categories=positive,negative positive
|
31
|
+
% echo "I'm very very happy!" | tc-train --categories=positive,negative positive
|
32
|
+
% echo "I'm so bad..." | tc-train --categories=positive,negative negative
|
33
|
+
% echo "Oh my god!" | tc-train --categories=positive,negative negative
|
34
34
|
```
|
35
35
|
|
36
|
-
The training data will be saved as `tc.negative-positive.dat` (`tc.` is the fixed prefix, `.dat` is the fixed suffix. The middle part is filled by given
|
36
|
+
The training data will be saved as `tc.negative-positive.dat` (`tc.` is the fixed prefix, `.dat` is the fixed suffix. The middle part is filled by given categories automatically.) in the current directory. If you hope the file to be saved in any different place, please specify `--base-dir=/path/to/data/directory`.
|
37
37
|
|
38
38
|
Untraining for mistakes:
|
39
39
|
|
40
40
|
```
|
41
|
-
% echo "I'm so bad..." | tc-untrain --
|
41
|
+
% echo "I'm so bad..." | tc-untrain --categories=positive,negative positive
|
42
42
|
```
|
43
43
|
|
44
44
|
Testing to classify:
|
45
45
|
|
46
46
|
~~~
|
47
|
-
% echo "Happy day?" | tc-classify --
|
47
|
+
% echo "Happy day?" | tc-classify --categories=positive,negative
|
48
48
|
positive
|
49
49
|
~~~
|
50
50
|
|
51
51
|
If you think that the classifier has been enoughly trained, then you can generate a fixed classifier:
|
52
52
|
|
53
53
|
~~~
|
54
|
-
% tc-generate-classifier --
|
54
|
+
% tc-generate-classifier --categories=positive,negative --output-dir=/path/to/dir
|
55
55
|
~~~
|
56
56
|
|
57
|
-
Then a fixed classifier (executable Ruby script) will be generated as `tc-classify-negative-positive` (`tc-classify-` is the fixed prefix, rest is filled by given
|
57
|
+
Then a fixed classifier (executable Ruby script) will be generated as `tc-classify-negative-positive` (`tc-classify-` is the fixed prefix, rest is filled by given categories automatically.)
|
58
58
|
|
59
59
|
~~~
|
60
60
|
% ls /path/to/dir/
|
@@ -67,8 +67,8 @@ positive
|
|
67
67
|
|
68
68
|
### Common
|
69
69
|
|
70
|
-
`-l`, `--
|
71
|
-
: A comman-separated list of
|
70
|
+
`-l`, `--categories=CATEGORIES` (required)
|
71
|
+
: A comman-separated list of categories. You should use only alphabetic characters. (Non-alphabetical characters will cause problems.)
|
72
72
|
|
73
73
|
`-d`, `--data-dir=PATH` (optional)
|
74
74
|
: The path to the directory that the training data to be saved. The current directory is the default value.
|
@@ -78,7 +78,7 @@ positive
|
|
78
78
|
|
79
79
|
### `tc-train` and `tc-untrain` specific parameters
|
80
80
|
|
81
|
-
Both `tc-train` and `tc-untrain` require one command line argument: the
|
81
|
+
Both `tc-train` and `tc-untrain` require one command line argument: the category. You need to specify one of categories given via the `--categories` parameter.
|
82
82
|
|
83
83
|
### `tc-generate-classifier` specific parameters
|
84
84
|
|
data/bin/tc-retrain
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
require "tiny-classifier/retrainer"
|
19
|
+
|
20
|
+
TinyClassifier::Retrainer.run
|
data/lib/tiny-classifier/base.rb
CHANGED
@@ -48,9 +48,9 @@ module TinyClassifier
|
|
48
48
|
@data_dir = data_dir
|
49
49
|
end
|
50
50
|
|
51
|
-
parser.on("-
|
52
|
-
"List of
|
53
|
-
@
|
51
|
+
parser.on("-c CATEGORIES", "--categories=CATEGORIES",
|
52
|
+
"List of categories (comma-separated)") do |categories|
|
53
|
+
@categories = normalize_categories(categories)
|
54
54
|
end
|
55
55
|
|
56
56
|
parser.on("-t TOKENIZER", "--tokenizer=TOKENIZER",
|
@@ -61,14 +61,14 @@ module TinyClassifier
|
|
61
61
|
parser
|
62
62
|
end
|
63
63
|
|
64
|
-
def
|
65
|
-
|
64
|
+
def normalize_categories(categories)
|
65
|
+
categories
|
66
66
|
.strip
|
67
67
|
.downcase
|
68
68
|
.split(",")
|
69
69
|
.collect(&:strip)
|
70
|
-
.reject do |
|
71
|
-
|
70
|
+
.reject do |category|
|
71
|
+
category.empty?
|
72
72
|
end
|
73
73
|
.sort
|
74
74
|
.collect(&:capitalize)
|
@@ -79,8 +79,8 @@ module TinyClassifier
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def prepare_data_file_name
|
82
|
-
|
83
|
-
"tc.#{
|
82
|
+
categories = @categories.join("-").downcase
|
83
|
+
"tc.#{categories}.dat"
|
84
84
|
end
|
85
85
|
|
86
86
|
def data_file_path
|
@@ -97,7 +97,7 @@ module TinyClassifier
|
|
97
97
|
data = File.read(data_file_path.to_s)
|
98
98
|
Marshal.load(data)
|
99
99
|
else
|
100
|
-
ClassifierReborn::Bayes.new(*@
|
100
|
+
ClassifierReborn::Bayes.new(*@categories)
|
101
101
|
end
|
102
102
|
end
|
103
103
|
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Copyright (C) 2017 YUKI "Piro" Hiroshi
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "tiny-classifier/trainer"
|
17
|
+
|
18
|
+
module TinyClassifier
|
19
|
+
class Retrainer < Trainer
|
20
|
+
class << self
|
21
|
+
def run(argv=nil)
|
22
|
+
argv ||= ARGV.dup
|
23
|
+
retrainer = new
|
24
|
+
*categories = retrainer.parse_command_line_options(argv)
|
25
|
+
retrainer.run(wrong: categories[0],
|
26
|
+
correct: categories[1])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def run(params)
|
31
|
+
if input.empty?
|
32
|
+
STDERR.puts("Error: No effective input.")
|
33
|
+
false
|
34
|
+
else
|
35
|
+
@category = params[:wrong]
|
36
|
+
prepare_category
|
37
|
+
classifier.send("untrain_#{@category}", input)
|
38
|
+
@category = params[:correct]
|
39
|
+
prepare_category
|
40
|
+
classifier.send("train_#{@category}", input)
|
41
|
+
save
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -21,45 +21,45 @@ module TinyClassifier
|
|
21
21
|
def run(argv=nil)
|
22
22
|
argv ||= ARGV.dup
|
23
23
|
trainer = new
|
24
|
-
*
|
25
|
-
trainer.run(
|
24
|
+
*categories = trainer.parse_command_line_options(argv)
|
25
|
+
trainer.run(category: categories.first)
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
29
|
def initialize
|
30
30
|
super
|
31
|
-
option_parser.banner += "
|
31
|
+
option_parser.banner += " CATEGORY"
|
32
32
|
end
|
33
33
|
|
34
34
|
def run(params)
|
35
|
-
@
|
36
|
-
|
35
|
+
@category = params[:category]
|
36
|
+
prepare_category
|
37
37
|
if input.empty?
|
38
38
|
STDERR.puts("Error: No effective input.")
|
39
39
|
false
|
40
40
|
else
|
41
|
-
classifier.send("train_#{@
|
41
|
+
classifier.send("train_#{@category}", input)
|
42
42
|
save
|
43
43
|
true
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
47
|
private
|
48
|
-
def
|
49
|
-
unless @
|
50
|
-
STDERR.puts("Error: You need to specify the
|
48
|
+
def prepare_category
|
49
|
+
unless @category
|
50
|
+
STDERR.puts("Error: You need to specify the category for the input.")
|
51
51
|
exit(false)
|
52
52
|
end
|
53
53
|
|
54
|
-
@
|
54
|
+
@category = @category.downcase.strip
|
55
55
|
|
56
|
-
if @
|
57
|
-
STDERR.puts("Error: You need to specify the
|
56
|
+
if @category.empty?
|
57
|
+
STDERR.puts("Error: You need to specify the category for the input.")
|
58
58
|
exit(false)
|
59
59
|
end
|
60
60
|
|
61
|
-
unless @
|
62
|
-
STDERR.puts("Error: You need to specify one of valid
|
61
|
+
unless @categories.include?(@category.capitalize)
|
62
|
+
STDERR.puts("Error: You need to specify one of valid categories: #{@categories.join(', ')}")
|
63
63
|
exit(false)
|
64
64
|
end
|
65
65
|
end
|
@@ -18,13 +18,13 @@ require "tiny-classifier/trainer"
|
|
18
18
|
module TinyClassifier
|
19
19
|
class Untrainer < Trainer
|
20
20
|
def run(params)
|
21
|
-
@
|
22
|
-
|
21
|
+
@category = params[:category]
|
22
|
+
prepare_category
|
23
23
|
if input.empty?
|
24
24
|
STDERR.puts("Error: No effective input.")
|
25
25
|
false
|
26
26
|
else
|
27
|
-
classifier.send("untrain_#{@
|
27
|
+
classifier.send("untrain_#{@category}", input)
|
28
28
|
save
|
29
29
|
true
|
30
30
|
end
|
data/tiny-classifier.gemspec
CHANGED
@@ -21,7 +21,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
|
|
21
21
|
|
22
22
|
Gem::Specification.new do |spec|
|
23
23
|
spec.name = "tiny-classifier"
|
24
|
-
spec.version = "
|
24
|
+
spec.version = "2.0"
|
25
25
|
spec.homepage = "https://github.com/piroor/tiny-classifier"
|
26
26
|
spec.authors = ["YUKI \"Piro\" Hiroshi"]
|
27
27
|
spec.email = ["piro.outsider.reflex@gmail.com"]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '
|
4
|
+
version: '2.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- YUKI "Piro" Hiroshi
|
@@ -44,6 +44,7 @@ email:
|
|
44
44
|
executables:
|
45
45
|
- tc-classify
|
46
46
|
- tc-generate-classifier
|
47
|
+
- tc-retrain
|
47
48
|
- tc-train
|
48
49
|
- tc-untrain
|
49
50
|
extensions: []
|
@@ -54,11 +55,13 @@ files:
|
|
54
55
|
- Rakefile
|
55
56
|
- bin/tc-classify
|
56
57
|
- bin/tc-generate-classifier
|
58
|
+
- bin/tc-retrain
|
57
59
|
- bin/tc-train
|
58
60
|
- bin/tc-untrain
|
59
61
|
- lib/tiny-classifier/base.rb
|
60
62
|
- lib/tiny-classifier/classifier-generator.rb
|
61
63
|
- lib/tiny-classifier/classifier.rb
|
64
|
+
- lib/tiny-classifier/retrainer.rb
|
62
65
|
- lib/tiny-classifier/tokenizer.rb
|
63
66
|
- lib/tiny-classifier/trainer.rb
|
64
67
|
- lib/tiny-classifier/untrainer.rb
|