natto2classifier 0.2.0 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +1 -10
- data/.travis.yml +18 -2
- data/Gemfile.lock +6 -6
- data/README.md +15 -2
- data/lib/natto2classifier.rb +1 -0
- data/lib/natto2classifier/bayes.rb +17 -0
- data/lib/natto2classifier/lsi.rb +22 -0
- data/lib/natto2classifier/natto.rb +6 -3
- data/lib/natto2classifier/validator.rb +6 -0
- data/lib/natto2classifier/version.rb +1 -1
- data/natto2classifier.gemspec +1 -1
- metadata +11 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cc64abd451078f00dbc116e3285248f1948acabf670642a4fe66e4b90f371cfb
|
4
|
+
data.tar.gz: 7c0e6ad7d763513d9a443a4fc1c65332464f2821ac986f2c81a5834360750970
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1b2ecb24ef6bd19690e41e59232f353309862ef7007ffa8e4cfbabfceb883001a3dcdd50cadd015dc038542566b99fd117caddc7d6778afff2c37849d052dc0
|
7
|
+
data.tar.gz: 23f9c9af1ea91106c83741d003dd7c56b4b608bab789bf518e185f28f8bd2ffefbfecf981b7aeb8d5417f59c2fade90237ca505cb1330784eb82e30786d0244d
|
data/.circleci/config.yml
CHANGED
@@ -5,6 +5,7 @@ jobs:
|
|
5
5
|
- image: kanayannet/natto2classifier:latest
|
6
6
|
steps:
|
7
7
|
- checkout
|
8
|
+
- run: echo 'run test'
|
8
9
|
- run: bundle install
|
9
10
|
- run: bundle exec ruby test/natto2classifier_test.rb
|
10
11
|
workflows:
|
@@ -12,13 +13,3 @@ workflows:
|
|
12
13
|
test:
|
13
14
|
jobs:
|
14
15
|
- test
|
15
|
-
scheduled-workflow:
|
16
|
-
triggers:
|
17
|
-
- schedule:
|
18
|
-
cron: "0 * * * *"
|
19
|
-
filters:
|
20
|
-
branches:
|
21
|
-
only:
|
22
|
-
- master
|
23
|
-
jobs:
|
24
|
-
- test
|
data/.travis.yml
CHANGED
@@ -1,5 +1,21 @@
|
|
1
|
-
sudo: false
|
2
1
|
language: ruby
|
3
2
|
rvm:
|
4
3
|
- 2.4.3
|
5
|
-
before_install:
|
4
|
+
before_install:
|
5
|
+
- gem install bundler -v 1.16.1
|
6
|
+
- sudo apt-get update -qq
|
7
|
+
install:
|
8
|
+
# mecab
|
9
|
+
- wget --no-check-certificate https://github.com/buruzaemon/natto/raw/master/etc/mecab-0.996.tar.gz && tar zxf mecab-0.996.tar.gz
|
10
|
+
- pushd mecab-0.996 && ./configure --enable-utf8-only && make && sudo make install && popd
|
11
|
+
- sudo ldconfig
|
12
|
+
# mecab-ipadic
|
13
|
+
- wget --no-check-certificate https://github.com/buruzaemon/natto/raw/master/etc/mecab-ipadic-2.7.0-20070801.tar.gz && tar zxf mecab-ipadic-2.7.0-20070801.tar.gz
|
14
|
+
- pushd mecab-ipadic-2.7.0-20070801 && ./configure --with-charset=utf8 && make && sudo make install && popd
|
15
|
+
- sudo ldconfig
|
16
|
+
# gsl
|
17
|
+
- sudo apt-get install libgsl2 libgsl-dev
|
18
|
+
# explicitly install
|
19
|
+
- bundle install --path .bundle
|
20
|
+
script:
|
21
|
+
- bundle exec ruby test/natto2classifier_test.rb
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
natto2classifier (0.
|
4
|
+
natto2classifier (0.3.5)
|
5
5
|
classifier-reborn
|
6
6
|
natto
|
7
7
|
rb-gsl
|
@@ -13,16 +13,16 @@ GEM
|
|
13
13
|
fast-stemmer (~> 1.0)
|
14
14
|
coderay (1.1.2)
|
15
15
|
fast-stemmer (1.0.2)
|
16
|
-
ffi (1.
|
16
|
+
ffi (1.13.1)
|
17
17
|
gsl (2.1.0.3)
|
18
18
|
method_source (0.9.0)
|
19
19
|
minitest (5.11.3)
|
20
|
-
natto (1.
|
20
|
+
natto (1.2.0)
|
21
21
|
ffi (>= 1.9.0)
|
22
22
|
pry (0.11.3)
|
23
23
|
coderay (~> 1.1.0)
|
24
24
|
method_source (~> 0.9.0)
|
25
|
-
rake (
|
25
|
+
rake (13.0.1)
|
26
26
|
rb-gsl (1.16.0.6)
|
27
27
|
gsl
|
28
28
|
|
@@ -34,7 +34,7 @@ DEPENDENCIES
|
|
34
34
|
minitest (~> 5.0)
|
35
35
|
natto2classifier!
|
36
36
|
pry
|
37
|
-
rake (~>
|
37
|
+
rake (~> 13.0)
|
38
38
|
|
39
39
|
BUNDLED WITH
|
40
|
-
1.
|
40
|
+
1.17.2
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Natto2classifier
|
2
2
|
|
3
|
+
[![Build Status](https://travis-ci.org/kanayannet/natto2classifier.svg?branch=master)](https://travis-ci.org/kanayannet/natto2classifier)
|
4
|
+
|
3
5
|
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/natto2classifier`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
6
|
|
5
7
|
## Installation
|
@@ -39,6 +41,17 @@ lsi.classify '納豆はいつも朝食べている' #=> '朝食'
|
|
39
41
|
lsi.find_related '納豆はいつも朝食べている' #=> ['今日 キョウ の ノ 朝食 チョウショク は ハ 納豆 ナットウ だ ダ', '今日 キョウ の ノ 夕食 ユウショク は ハ 湯豆腐 ユドウフ だ ダ']
|
40
42
|
```
|
41
43
|
|
44
|
+
### validate methods
|
45
|
+
|
46
|
+
```
|
47
|
+
sample_data = CSV.read('./data/train.csv')
|
48
|
+
bayes = Natto2classifier::Bayes.new '朝食', '夕食'
|
49
|
+
cross_validate(bayes, sample_data) #=> report...
|
50
|
+
|
51
|
+
test_data, training_data = sample_data.partition.with_index { |_, i| (i % 2).zero? }
|
52
|
+
validate(bayes, training_data, test_data) #=> {"夕食"=>{"夕食"=>3, "朝食"=>0}, "朝食"=>{"夕食"=>...}}
|
53
|
+
```
|
54
|
+
|
42
55
|
## Development
|
43
56
|
|
44
57
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -47,7 +60,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
47
60
|
|
48
61
|
## Contributing
|
49
62
|
|
50
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
63
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/kanayannet/natto2classifier. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
51
64
|
|
52
65
|
## License
|
53
66
|
|
@@ -55,4 +68,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
55
68
|
|
56
69
|
## Code of Conduct
|
57
70
|
|
58
|
-
Everyone interacting in the Natto2classifier project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
71
|
+
Everyone interacting in the Natto2classifier project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/kanayannet/natto2classifier/blob/master/CODE_OF_CONDUCT.md).
|
data/lib/natto2classifier.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
module Natto2classifier
|
3
|
+
# It is a library that classifies Japanese language.
|
4
|
+
class Bayes < ClassifierReborn::Bayes
|
5
|
+
alias_method :__train__, :train
|
6
|
+
alias_method :__classify__, :classify
|
7
|
+
private :__train__, :__classify__
|
8
|
+
|
9
|
+
def train(category, word)
|
10
|
+
__train__ category, Natto2classifier::Natto.parse(word).join(' ')
|
11
|
+
end
|
12
|
+
|
13
|
+
def classify(word)
|
14
|
+
__classify__ Natto2classifier::Natto.parse(word).join(' ')
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
module Natto2classifier
|
3
|
+
# It is a library that classifies Japanese language.
|
4
|
+
class LSI < ClassifierReborn::LSI
|
5
|
+
alias_method :__add_item__, :add_item
|
6
|
+
alias_method :__classify__, :classify
|
7
|
+
alias_method :__find_related__, :find_related
|
8
|
+
private :__add_item__, :__classify__, :__find_related__
|
9
|
+
|
10
|
+
def add_item(word, category)
|
11
|
+
__add_item__ Natto2classifier::Natto.parse(word).join(' '), category
|
12
|
+
end
|
13
|
+
|
14
|
+
def classify(word)
|
15
|
+
__classify__ Natto2classifier::Natto.parse(word).join(' ')
|
16
|
+
end
|
17
|
+
|
18
|
+
def find_related(word)
|
19
|
+
__find_related__ Natto2classifier::Natto.parse(word).join(' ')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -4,10 +4,13 @@ require 'natto'
|
|
4
4
|
module Natto2classifier
|
5
5
|
class Natto
|
6
6
|
def self.parse(word)
|
7
|
-
nm = ::Natto::MeCab.new
|
7
|
+
nm = ::Natto::MeCab.new
|
8
8
|
results = []
|
9
|
-
nm.
|
10
|
-
|
9
|
+
nm.parse(word.to_s) do |n|
|
10
|
+
break if n.is_eos?
|
11
|
+
kana = n.feature.split(',')[7]
|
12
|
+
results << n.surface
|
13
|
+
results << kana if !kana.nil? && kana != '*'
|
11
14
|
end
|
12
15
|
results
|
13
16
|
end
|
data/natto2classifier.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
24
|
spec.add_development_dependency "bundler", "~> 1.16"
|
25
|
-
spec.add_development_dependency "rake", "~>
|
25
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
26
26
|
spec.add_development_dependency "minitest", "~> 5.0"
|
27
27
|
spec.add_development_dependency "pry"
|
28
28
|
spec.add_runtime_dependency "natto"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto2classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kanayannet
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '13.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '13.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -129,14 +129,17 @@ files:
|
|
129
129
|
- bin/setup
|
130
130
|
- data/train.csv
|
131
131
|
- lib/natto2classifier.rb
|
132
|
+
- lib/natto2classifier/bayes.rb
|
133
|
+
- lib/natto2classifier/lsi.rb
|
132
134
|
- lib/natto2classifier/natto.rb
|
135
|
+
- lib/natto2classifier/validator.rb
|
133
136
|
- lib/natto2classifier/version.rb
|
134
137
|
- natto2classifier.gemspec
|
135
138
|
homepage: https://github.com/kanayannet/natto2classifier
|
136
139
|
licenses:
|
137
140
|
- MIT
|
138
141
|
metadata: {}
|
139
|
-
post_install_message:
|
142
|
+
post_install_message:
|
140
143
|
rdoc_options: []
|
141
144
|
require_paths:
|
142
145
|
- lib
|
@@ -151,9 +154,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
151
154
|
- !ruby/object:Gem::Version
|
152
155
|
version: '0'
|
153
156
|
requirements: []
|
154
|
-
|
155
|
-
|
156
|
-
signing_key:
|
157
|
+
rubygems_version: 3.0.3
|
158
|
+
signing_key:
|
157
159
|
specification_version: 4
|
158
160
|
summary: It is a library that classifies Japanese language.
|
159
161
|
test_files: []
|