mecab-noun_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f6cd2e447fe7145e53db72ae3b47115431680c74
4
+ data.tar.gz: c053a3dbce8b81d178141bf0c692d315714bbb22
5
+ SHA512:
6
+ metadata.gz: 5f57471cd13cc41e2647f93d5f212bac6b428dd7445049c7c50ef9d6b1ecc3a4a92c452442e5e3948e03986d4d4292b7724e8b1db55612823987c719078621ef
7
+ data.tar.gz: d6d565b9c254067d945eff1da7955829d60afae2e712a4d86bfe950413aefccfacc969bcef3605503a49c3d72ab5252723537fc2683acc0a1d0b046fe592d183
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ /.bundle/
2
+ /Gemfile.lock
3
+ /pkg/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,30 @@
1
+ before_install:
2
+ - sudo apt-get update
3
+
4
+ # mecab
5
+ - wget https://mecab.googlecode.com/files/mecab-0.996.tar.gz
6
+ - tar zxfv mecab-0.996.tar.gz
7
+ - cd mecab-0.996
8
+ - ./configure --enable-utf8-only
9
+ - make
10
+ - make check
11
+ - sudo make install
12
+ - sudo ldconfig
13
+
14
+ - cd ../
15
+
16
+ # mecab-ipadic
17
+ - wget https://mecab.googlecode.com/files/mecab-ipadic-2.7.0-20070801.tar.gz
18
+ - tar zxfv mecab-ipadic-2.7.0-20070801.tar.gz
19
+ - cd mecab-ipadic-2.7.0-20070801
20
+ - ./configure --with-charset=utf8
21
+ - make
22
+ - sudo make install
23
+ - sudo ldconfig
24
+
25
+ - cd ../
26
+
27
+ language: ruby
28
+
29
+ rvm:
30
+ - 2.0.0
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/README.md ADDED
@@ -0,0 +1,64 @@
1
+ # Mecab::NounParser
2
+
3
+ [![Build Status](https://travis-ci.org/kami30k/mecab-noun_parser.svg)](https://travis-ci.org/kami30k/mecab-noun_parser)
4
+ [![Gem Version](https://badge.fury.io/rb/mecab-noun_parser.svg)](http://badge.fury.io/rb/mecab-noun_parser)
5
+
6
+ This gem is a noun parser for [MeCab](https://github.com/markburns/mecab) gem.
7
+ This counts the number of nouns, and arranging it in descending order.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ ```ruby
14
+ gem 'mecab'
15
+ gem 'mecab-noun_parser'
16
+ ```
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ ## Usage
23
+
24
+ Here's a simple example to use:
25
+
26
+ ```ruby
27
+ require 'mecab'
28
+ require 'mecab-noun_parser'
29
+
30
+ parser = MeCab::NounParser.new
31
+
32
+ parser << 'Rubyは、手軽なオブジェクト指向プログラミングを実現するための種々の機能を持つオブジェクト指向スクリプト言語です。本格的なオブジェクト指向言語であるSmalltalk、EiffelやC++などでは大げさに思われるような領域でのオブジェクト指向プログラミングを支援することを目的としています。もちろん通常の手続き型のプログラミングも可能です。'
33
+ parser << 'Rubyはテキスト処理関係の能力などに優れ、Perlと同じくらい強力です。さらにシンプルな文法と、例外処理やイテレータなどの機構によって、より分かりやすいプログラミングが出来ます。'
34
+ parser << 'まあ、簡単にいえばPerlのような手軽さで「楽しく」オブジェクト指向しようという言語です。どうぞ使ってみてください。'
35
+ parser << 'Rubyはまつもと ゆきひろが個人で開発しているフリーソフトウェアです。'
36
+
37
+ p parser.parse.nouns
38
+ ```
39
+
40
+ This results is as follows:
41
+
42
+ ```ruby
43
+ [
44
+ {:noun=>"Ruby", :count=>3},
45
+ {:noun=>"オブジェクト指向プログラミング", :count=>2},
46
+ {:noun=>"Perl", :count=>2},
47
+ :
48
+ {:noun=>"個人", :count=>1},
49
+ {:noun=>"開発", :count=>1},
50
+ {:noun=>"フリーソフトウェア", :count=>1}
51
+ ]
52
+ ```
53
+
54
+ ### Except some words
55
+
56
+ If you don't want to include some words, you should initialize with `:except` option:
57
+
58
+ ```ruby
59
+ parser = MeCab::NounParser.new(except: ['Smalltalk', 'Eiffel', ...])
60
+ ```
61
+
62
+ ## Contributing
63
+
64
+ Bug reports and pull requests are welcome on GitHub at https://github.com/kami30k/mecab-noun_parser.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
@@ -0,0 +1,52 @@
1
+ module MeCab
2
+ class NounParser
3
+ def initialize(**options)
4
+ @except = options[:except] || []
5
+ @nouns = {}
6
+ @nouns.default = 0
7
+ @text = []
8
+ end
9
+
10
+ def <<(text)
11
+ @text << text
12
+ end
13
+
14
+ def parse
15
+ @text.each do |text|
16
+ node = mecab.parseToNode(text)
17
+ noun = ''
18
+
19
+ while node
20
+ if node.feature.split(',')[0] == '名詞'
21
+ unless @except.include?(node.surface)
22
+ noun << node.surface
23
+ end
24
+ else
25
+ unless noun.empty?
26
+ @nouns[noun] += 1
27
+ noun = ''
28
+ end
29
+ end
30
+
31
+ node = node.next
32
+ end
33
+ end
34
+
35
+ self
36
+ end
37
+
38
+ def nouns
39
+ sorted_nouns.map { |noun| { noun: noun[0], count: noun[1] } }
40
+ end
41
+
42
+ private
43
+
44
+ def mecab
45
+ @mecab ||= MeCab::Tagger.new
46
+ end
47
+
48
+ def sorted_nouns
49
+ @nouns.sort_by { |_, count| -count }
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,20 @@
1
+ $:.unshift File.expand_path('../lib', __FILE__)
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'mecab-noun_parser'
5
+ s.version = '0.1.0'
6
+ s.authors = 'kami'
7
+ s.email = 'kami30k@gmail.com'
8
+
9
+ s.summary = 'A noun parser for MeCab gem.'
10
+ s.description = 'A noun parser for MeCab gem.'
11
+ s.homepage = 'https://github.com/kami30k/mecab-noun_parser'
12
+
13
+ s.files = `git ls-files -z`.split("\x0")
14
+
15
+ s.add_dependency 'mecab'
16
+
17
+ s.add_development_dependency 'bundler'
18
+ s.add_development_dependency 'rake'
19
+ s.add_development_dependency 'rspec'
20
+ end
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+
3
+ describe MeCab::NounParser do
4
+ let(:nouns) { parser.parse.nouns }
5
+
6
+ before do
7
+ parser << 'Rubyは、手軽なオブジェクト指向プログラミングを実現するための種々の機能を持つオブジェクト指向スクリプト言語です。本格的なオブジェクト指向言語であるSmalltalk、EiffelやC++などでは大げさに思われるような領域でのオブジェクト指向プログラミングを支援することを目的としています。もちろん通常の手続き型のプログラミングも可能です。'
8
+ parser << 'Rubyはテキスト処理関係の能力などに優れ、Perlと同じくらい強力です。さらにシンプルな文法と、例外処理やイテレータなどの機構によって、より分かりやすいプログラミングが出来ます。'
9
+ parser << 'まあ、簡単にいえばPerlのような手軽さで「楽しく」オブジェクト指向しようという言語です。どうぞ使ってみてください。'
10
+ parser << 'Rubyはまつもと ゆきひろが個人で開発しているフリーソフトウェアです。'
11
+ end
12
+
13
+ context 'default usage' do
14
+ let(:parser) { MeCab::NounParser.new }
15
+
16
+ it 'should be sort' do
17
+ expect(nouns.first[:noun]).to eq 'Ruby'
18
+ expect(nouns.first[:count]).to eq 3
19
+ end
20
+
21
+ it 'should be count' do
22
+ count = nouns.find { |noun| noun[:noun] == 'オブジェクト指向プログラミング' }.count
23
+ expect(count).to eq 2
24
+ end
25
+ end
26
+
27
+ context 'except some words' do
28
+ let(:parser) { MeCab::NounParser.new(except: ['Ruby', 'Smalltalk']) }
29
+
30
+ it 'should not be include' do
31
+ expect(nouns.first[:noun]).not_to eq 'Ruby'
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,2 @@
1
+ require 'mecab'
2
+ require 'mecab/noun_parser'
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mecab-noun_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - kami
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-08-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mecab
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: A noun parser for MeCab gem.
70
+ email: kami30k@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - ".gitignore"
76
+ - ".rspec"
77
+ - ".travis.yml"
78
+ - Gemfile
79
+ - README.md
80
+ - Rakefile
81
+ - lib/mecab/noun_parser.rb
82
+ - mecab-noun_parser.gemspec
83
+ - spec/mecab/noun_parser_spec.rb
84
+ - spec/spec_helper.rb
85
+ homepage: https://github.com/kami30k/mecab-noun_parser
86
+ licenses: []
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 2.4.5
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: A noun parser for MeCab gem.
108
+ test_files: []
109
+ has_rdoc: