mecab-noun_parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/.rspec +2 -0
- data/.travis.yml +30 -0
- data/Gemfile +3 -0
- data/README.md +64 -0
- data/Rakefile +6 -0
- data/lib/mecab/noun_parser.rb +52 -0
- data/mecab-noun_parser.gemspec +20 -0
- data/spec/mecab/noun_parser_spec.rb +34 -0
- data/spec/spec_helper.rb +2 -0
- metadata +109 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f6cd2e447fe7145e53db72ae3b47115431680c74
|
4
|
+
data.tar.gz: c053a3dbce8b81d178141bf0c692d315714bbb22
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5f57471cd13cc41e2647f93d5f212bac6b428dd7445049c7c50ef9d6b1ecc3a4a92c452442e5e3948e03986d4d4292b7724e8b1db55612823987c719078621ef
|
7
|
+
data.tar.gz: d6d565b9c254067d945eff1da7955829d60afae2e712a4d86bfe950413aefccfacc969bcef3605503a49c3d72ab5252723537fc2683acc0a1d0b046fe592d183
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
before_install:
|
2
|
+
- sudo apt-get update
|
3
|
+
|
4
|
+
# mecab
|
5
|
+
- wget https://mecab.googlecode.com/files/mecab-0.996.tar.gz
|
6
|
+
- tar zxfv mecab-0.996.tar.gz
|
7
|
+
- cd mecab-0.996
|
8
|
+
- ./configure --enable-utf8-only
|
9
|
+
- make
|
10
|
+
- make check
|
11
|
+
- sudo make install
|
12
|
+
- sudo ldconfig
|
13
|
+
|
14
|
+
- cd ../
|
15
|
+
|
16
|
+
# mecab-ipadic
|
17
|
+
- wget https://mecab.googlecode.com/files/mecab-ipadic-2.7.0-20070801.tar.gz
|
18
|
+
- tar zxfv mecab-ipadic-2.7.0-20070801.tar.gz
|
19
|
+
- cd mecab-ipadic-2.7.0-20070801
|
20
|
+
- ./configure --with-charset=utf8
|
21
|
+
- make
|
22
|
+
- sudo make install
|
23
|
+
- sudo ldconfig
|
24
|
+
|
25
|
+
- cd ../
|
26
|
+
|
27
|
+
language: ruby
|
28
|
+
|
29
|
+
rvm:
|
30
|
+
- 2.0.0
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Mecab::NounParser
|
2
|
+
|
3
|
+
[](https://travis-ci.org/kami30k/mecab-noun_parser)
|
4
|
+
[](http://badge.fury.io/rb/mecab-noun_parser)
|
5
|
+
|
6
|
+
This gem is a noun parser for [MeCab](https://github.com/markburns/mecab) gem.
|
7
|
+
This counts the number of nouns, and arranging it in descending order.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'mecab'
|
15
|
+
gem 'mecab-noun_parser'
|
16
|
+
```
|
17
|
+
|
18
|
+
And then execute:
|
19
|
+
|
20
|
+
$ bundle
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
Here's a simple example to use:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
require 'mecab'
|
28
|
+
require 'mecab-noun_parser'
|
29
|
+
|
30
|
+
parser = MeCab::NounParser.new
|
31
|
+
|
32
|
+
parser << 'Rubyは、手軽なオブジェクト指向プログラミングを実現するための種々の機能を持つオブジェクト指向スクリプト言語です。本格的なオブジェクト指向言語であるSmalltalk、EiffelやC++などでは大げさに思われるような領域でのオブジェクト指向プログラミングを支援することを目的としています。もちろん通常の手続き型のプログラミングも可能です。'
|
33
|
+
parser << 'Rubyはテキスト処理関係の能力などに優れ、Perlと同じくらい強力です。さらにシンプルな文法と、例外処理やイテレータなどの機構によって、より分かりやすいプログラミングが出来ます。'
|
34
|
+
parser << 'まあ、簡単にいえばPerlのような手軽さで「楽しく」オブジェクト指向しようという言語です。どうぞ使ってみてください。'
|
35
|
+
parser << 'Rubyはまつもと ゆきひろが個人で開発しているフリーソフトウェアです。'
|
36
|
+
|
37
|
+
p parser.parse.nouns
|
38
|
+
```
|
39
|
+
|
40
|
+
This results is as follows:
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
[
|
44
|
+
{:noun=>"Ruby", :count=>3},
|
45
|
+
{:noun=>"オブジェクト指向プログラミング", :count=>2},
|
46
|
+
{:noun=>"Perl", :count=>2},
|
47
|
+
:
|
48
|
+
{:noun=>"個人", :count=>1},
|
49
|
+
{:noun=>"開発", :count=>1},
|
50
|
+
{:noun=>"フリーソフトウェア", :count=>1}
|
51
|
+
]
|
52
|
+
```
|
53
|
+
|
54
|
+
### Except some words
|
55
|
+
|
56
|
+
If you don't want to include some words, you should initialize with `:except` option:
|
57
|
+
|
58
|
+
```ruby
|
59
|
+
parser = MeCab::NounParser.new(except: ['Smalltalk', 'Eiffel', ...])
|
60
|
+
```
|
61
|
+
|
62
|
+
## Contributing
|
63
|
+
|
64
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/kami30k/mecab-noun_parser.
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
module MeCab
|
2
|
+
class NounParser
|
3
|
+
def initialize(**options)
|
4
|
+
@except = options[:except] || []
|
5
|
+
@nouns = {}
|
6
|
+
@nouns.default = 0
|
7
|
+
@text = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def <<(text)
|
11
|
+
@text << text
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse
|
15
|
+
@text.each do |text|
|
16
|
+
node = mecab.parseToNode(text)
|
17
|
+
noun = ''
|
18
|
+
|
19
|
+
while node
|
20
|
+
if node.feature.split(',')[0] == '名詞'
|
21
|
+
unless @except.include?(node.surface)
|
22
|
+
noun << node.surface
|
23
|
+
end
|
24
|
+
else
|
25
|
+
unless noun.empty?
|
26
|
+
@nouns[noun] += 1
|
27
|
+
noun = ''
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
node = node.next
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
self
|
36
|
+
end
|
37
|
+
|
38
|
+
def nouns
|
39
|
+
sorted_nouns.map { |noun| { noun: noun[0], count: noun[1] } }
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def mecab
|
45
|
+
@mecab ||= MeCab::Tagger.new
|
46
|
+
end
|
47
|
+
|
48
|
+
def sorted_nouns
|
49
|
+
@nouns.sort_by { |_, count| -count }
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
$:.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'mecab-noun_parser'
|
5
|
+
s.version = '0.1.0'
|
6
|
+
s.authors = 'kami'
|
7
|
+
s.email = 'kami30k@gmail.com'
|
8
|
+
|
9
|
+
s.summary = 'A noun parser for MeCab gem.'
|
10
|
+
s.description = 'A noun parser for MeCab gem.'
|
11
|
+
s.homepage = 'https://github.com/kami30k/mecab-noun_parser'
|
12
|
+
|
13
|
+
s.files = `git ls-files -z`.split("\x0")
|
14
|
+
|
15
|
+
s.add_dependency 'mecab'
|
16
|
+
|
17
|
+
s.add_development_dependency 'bundler'
|
18
|
+
s.add_development_dependency 'rake'
|
19
|
+
s.add_development_dependency 'rspec'
|
20
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe MeCab::NounParser do
|
4
|
+
let(:nouns) { parser.parse.nouns }
|
5
|
+
|
6
|
+
before do
|
7
|
+
parser << 'Rubyは、手軽なオブジェクト指向プログラミングを実現するための種々の機能を持つオブジェクト指向スクリプト言語です。本格的なオブジェクト指向言語であるSmalltalk、EiffelやC++などでは大げさに思われるような領域でのオブジェクト指向プログラミングを支援することを目的としています。もちろん通常の手続き型のプログラミングも可能です。'
|
8
|
+
parser << 'Rubyはテキスト処理関係の能力などに優れ、Perlと同じくらい強力です。さらにシンプルな文法と、例外処理やイテレータなどの機構によって、より分かりやすいプログラミングが出来ます。'
|
9
|
+
parser << 'まあ、簡単にいえばPerlのような手軽さで「楽しく」オブジェクト指向しようという言語です。どうぞ使ってみてください。'
|
10
|
+
parser << 'Rubyはまつもと ゆきひろが個人で開発しているフリーソフトウェアです。'
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'default usage' do
|
14
|
+
let(:parser) { MeCab::NounParser.new }
|
15
|
+
|
16
|
+
it 'should be sort' do
|
17
|
+
expect(nouns.first[:noun]).to eq 'Ruby'
|
18
|
+
expect(nouns.first[:count]).to eq 3
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should be count' do
|
22
|
+
count = nouns.find { |noun| noun[:noun] == 'オブジェクト指向プログラミング' }.count
|
23
|
+
expect(count).to eq 2
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context 'except some words' do
|
28
|
+
let(:parser) { MeCab::NounParser.new(except: ['Ruby', 'Smalltalk']) }
|
29
|
+
|
30
|
+
it 'should not be include' do
|
31
|
+
expect(nouns.first[:noun]).not_to eq 'Ruby'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mecab-noun_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- kami
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-08-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mecab
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: A noun parser for MeCab gem.
|
70
|
+
email: kami30k@gmail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- ".gitignore"
|
76
|
+
- ".rspec"
|
77
|
+
- ".travis.yml"
|
78
|
+
- Gemfile
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- lib/mecab/noun_parser.rb
|
82
|
+
- mecab-noun_parser.gemspec
|
83
|
+
- spec/mecab/noun_parser_spec.rb
|
84
|
+
- spec/spec_helper.rb
|
85
|
+
homepage: https://github.com/kami30k/mecab-noun_parser
|
86
|
+
licenses: []
|
87
|
+
metadata: {}
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 2.4.5
|
105
|
+
signing_key:
|
106
|
+
specification_version: 4
|
107
|
+
summary: A noun parser for MeCab gem.
|
108
|
+
test_files: []
|
109
|
+
has_rdoc:
|