tiny-classifier 1.0 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef03bed349d3c266bab512446b6dff8946d5c14f
4
- data.tar.gz: 768126f03759f1fe38ef2e155dad3484836f4422
3
+ metadata.gz: 962697672b93b80fa4ca5efcf46c9e820d184b96
4
+ data.tar.gz: 199917cf6cae91ae3fed243818bf924426b929ca
5
5
  SHA512:
6
- metadata.gz: 9c59f06bef1b3fcc30914a410c29ac52444ea77790f5f4f588515c268bb181a2f353083bfe1ab3ceeba94964052807a6cb556ded863712b09f6488c7d1feff07
7
- data.tar.gz: aa188deccf010883b6d04f3ae63ba0124d9ff4f586690cfccf921be5e1466deed11e72b021083142d0e37134f80a1b462af4e7989af0d3c69c830a19391000c9
6
+ metadata.gz: 08b97dcd3859de27e4b39796fecfb57e5a978e55c8d47dcda19e700acee7cd8a7451e3e60f7e74c750530b17ee038222853babd278b63450b1f71cd82c3d5b12
7
+ data.tar.gz: 5c7be101d3209ff3079e597e069e502be00769aa11bb025825047bcc651bbbda5b1037ac361be4a4531dcea1a4cc32c9f82e922bd461ca4ab438036abbe75446
data/README.md CHANGED
@@ -14,6 +14,14 @@ Command line tool to run text classifier based on naive bayes.
14
14
  % gem install tiny-classifier
15
15
  ```
16
16
 
17
+ If you hope to use `--tokenizer=mecab`, you need to install MeCab like:
18
+
19
+ ```
20
+ % sudo apt install mecab mecab-ipadic-utf8
21
+ ```
22
+
23
+ This is example on Ubuntu.
24
+
17
25
  ## Basic usage
18
26
 
19
27
  Training:
@@ -36,13 +44,13 @@ positive
36
44
 
37
45
  ### Common
38
46
 
39
- `--labels=LABELS` (required)
47
+ `-l`, `--labels=LABELS` (required)
40
48
  : A comman-separated list of labels. You should use only alphabetic characters. (Non-alphabetical characters will cause problems.)
41
49
 
42
- `--data-dir=PATH` (optional)
43
- : The path to the directory that the training data to be saved. The current directory.
50
+ `-d`, `--data-dir=PATH` (optional)
51
+ : The path to the directory that the training data to be saved. The current directory is the default value.
44
52
 
45
- `--tokenizer=TOKENIZER` (optional)
53
+ `-t`, `--tokenizer=TOKENIZER` (optional)
46
54
  : Tokenizer for input which is not separated by whitespaces. Possible values are: only `mecab`.
47
55
 
48
56
  ### Trainer
@@ -26,27 +26,30 @@ class TinyClassifierBase
26
26
  end
27
27
 
28
28
  def parse_command_line_options(command_line_options)
29
- option_parser = create_option_parser
30
29
  option_parser.parse!(command_line_options)
31
30
  end
32
31
 
33
32
  private
33
+ def option_parser
34
+ @option_parser ||= create_option_parser
35
+ end
36
+
34
37
  def create_option_parser
35
38
  parser = OptionParser.new
36
39
 
37
- parser.on("--data-dir=PATH",
40
+ parser.on("-d PATH", "--data-dir=PATH",
38
41
  "Path to the directory to store training data file (default=current directory)") do |data_dir|
39
42
  @data_dir = data_dir
40
43
  end
41
44
 
42
- parser.on("--labels=LABELS",
45
+ parser.on("-l LABELS", "--labels=LABELS",
43
46
  "List of labels (comma-separated)") do |labels|
44
47
  @labels = normalize_labels(labels)
45
48
  end
46
49
 
47
- parser.on("--tokenizer=TOKENIZER",
50
+ parser.on("-t TOKENIZER", "--tokenizer=TOKENIZER",
48
51
  "Tokenizer (default=#{@tokenizer})") do |tokenizer|
49
- @tokenizer = tokenizer.to_sym
52
+ @tokenizer = tokenizer.downcase.to_sym
50
53
  end
51
54
 
52
55
  parser
@@ -113,7 +116,7 @@ class TinyClassifierBase
113
116
  natto = Natto::MeCab.new
114
117
  terms = []
115
118
  natto.parse(@input) do |term|
116
- if term.feature =~ /名詞|形容詞|動詞/
119
+ if term.feature =~ /\A(名詞|形容詞|動詞)/
117
120
  terms << term.surface
118
121
  end
119
122
  end
@@ -31,11 +31,12 @@ class Classifier < TinyClassifierBase
31
31
  @input = params[:input]
32
32
  prepare_input
33
33
  if @input.empty?
34
- exit(1)
34
+ STDERR.puts("Error: No effective input.")
35
+ false
35
36
  else
36
37
  label = classifier.classify(@input)
37
38
  puts label.downcase
38
- exit(0)
39
+ true
39
40
  end
40
41
  end
41
42
  end
@@ -27,20 +27,46 @@ class Trainer < TinyClassifierBase
27
27
  end
28
28
  end
29
29
 
30
+ def initialize
31
+ super
32
+ option_parser.banner += " LABEL"
33
+ end
34
+
30
35
  def run(params)
31
36
  @label = params[:label]
32
37
  @input = params[:input]
38
+ prepare_label
33
39
  prepare_input
34
40
  if @input.empty?
35
- exit(1)
41
+ STDERR.puts("Error: No effective input.")
42
+ false
36
43
  else
37
- classifier.send("train_#{@label.downcase}", @input)
44
+ classifier.send("train_#{@label}", @input)
38
45
  save
39
- exit(0)
46
+ true
40
47
  end
41
48
  end
42
49
 
43
50
  private
51
+ def prepare_label
52
+ unless @label
53
+ STDERR.puts("Error: You need to specify the label for the input.")
54
+ exit(false)
55
+ end
56
+
57
+ @label = @label.downcase.strip
58
+
59
+ if @label.empty?
60
+ STDERR.puts("Error: You need to specify the label for the input.")
61
+ exit(false)
62
+ end
63
+
64
+ unless @labels.include?(@label.capitalize)
65
+ STDERR.puts("Error: You need to specify one of valid labels: #{@labels.join(', ')}")
66
+ exit(false)
67
+ end
68
+ end
69
+
44
70
  def save
45
71
  data = Marshal.dump(classifier)
46
72
  File.open(data_file_path, "w") do |file|
@@ -21,7 +21,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
21
21
 
22
22
  Gem::Specification.new do |spec|
23
23
  spec.name = "tiny-classifier"
24
- spec.version = "1.0"
24
+ spec.version = "1.1"
25
25
  spec.homepage = "https://github.com/piroor/tiny-classifier"
26
26
  spec.authors = ["YUKI \"Piro\" Hiroshi"]
27
27
  spec.email = ["piro.outsider.reflex@gmail.com"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiny-classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.0'
4
+ version: '1.1'
5
5
  platform: ruby
6
6
  authors:
7
7
  - YUKI "Piro" Hiroshi