juman_knp 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 23f6aff424c726c227a527483945be9f3c9d434e
4
+ data.tar.gz: 645b90b022af79539701713554838c2c67feb2e5
5
+ SHA512:
6
+ metadata.gz: d4bfe2c9d60e20c22c6d9c823ce35d18630f84245aa2d4b55306e3b79e0b37ef8b84470f18fa61debb078928aa38177152da8634830581b467d4e879f15baece
7
+ data.tar.gz: 9fac5baebd623a8487160694a83ad3847824fb48ea81ce03b8dd5b68ee2cc652cbdb3308f5d3dd626612a5a4e270907d5aae14dc6c9cea697241217b252a17ab
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in juman_knp.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 FURUSAWA Tomohiro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,31 @@
1
+ # JumanKnp
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'juman_knp'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install juman_knp
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/juman_knp/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'juman_knp'
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'juman_knp/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "juman_knp"
8
+ spec.version = JumanKnp::VERSION
9
+ spec.authors = ["rilmayer"]
10
+ spec.email = ["git@frsw.net"]
11
+ spec.summary = %q{wrapper of JUMAN and KNP.}
12
+ spec.description = %q{You can use JUMAN and KNP for natural language processing, by Ruby.}
13
+ spec.homepage = "http://frsw.net/"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec"
24
+ end
@@ -0,0 +1,170 @@
1
+ require "juman_knp/version"
2
+
3
+ # coding: utf-8
4
+ class Juman
5
+ require 'open3'
6
+
7
+ Juman_Versin = "7.0"
8
+
9
+ Hinshi = ["名詞","助詞","動詞","接尾辞","助動詞","特殊","指示詞","判定詞","未定義語","形容詞","副詞","接頭辞","接続詞","連体詞","感動詞"]
10
+ Category = ["人","組織・団体","動物","植物","動物-部位","植物-部位","人工物-食べ物","人工物-衣類","人工物-乗り物","人工物-金銭","人工物-その他","自然物","場所-施設","場所-施設部位","場所-自然","場所-機能","場所-その他","抽象物","形・模様","色","数量","時間"]
11
+ Domain = ["文化・芸術","レクリエーション","スポーツ","健康・医学","家庭・暮らし","料理・食事","交通","教育・学習","科学・技術","ビジネス","メディア","政治"]
12
+
13
+ attr_reader :ma_arr, :string, :id, :pos
14
+
15
+ def initialize(string, id=nil, pos=nil)
16
+ @id = id # please use for tilte, id , etc...
17
+ @string = string
18
+ @pos = pos # parts of speech(pos)
19
+ @ma_arr = ma(string)
20
+
21
+ unless pos == nil
22
+ @specific_pos = words_of(pos)
23
+ end
24
+ end
25
+
26
+ def array_of(i)
27
+ array_of_i = Array.new
28
+
29
+ case i
30
+ when 0..10
31
+ @ma_arr.each{|e| array_of_i.push(e[i])}
32
+ # 代表表記
33
+ when 17
34
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "代表表記"))}
35
+ # 漢字読み
36
+ when 18
37
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "漢字読み"))}
38
+ # Category
39
+ when 19
40
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "カテゴリ"))}
41
+ # Domain
42
+ when 20
43
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "ドメイン"))}
44
+ else
45
+
46
+ end
47
+ return array_of_i
48
+ end
49
+
50
+ # filter of pos
51
+ def words_of(pos)
52
+ hinshi_arr = Array.new
53
+ pos.each do |h|
54
+ @ma_arr.each{|array_of| hinshi_arr.push(array_of) if h == array_of[3]}
55
+ end
56
+ @ma_arr = hinshi_arr
57
+ return hinshi_arr
58
+ end
59
+
60
+ private
61
+
62
+ # morphological analysis(ma)
63
+ # Parameter > Stiring for ma
64
+ # Return > Array of console output
65
+ def ma(string)
66
+ begin
67
+ maarr = Array.new
68
+ # Juman's input is only Shift-JIS(for Windos)
69
+ string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '')
70
+
71
+ # using open3, execute Juman
72
+ out, err, status = Open3.capture3("juman -b", :stdin_data => string)
73
+ out.each_line do |line|
74
+ line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
75
+ maarr.push(line.split(/\s/)) unless line == "EOS"
76
+ end
77
+ return maarr
78
+ rescue
79
+ print("[エラー]:JUMANへPathを通してください。\n")
80
+ exit!
81
+ end
82
+ end
83
+
84
+ # Parameter > e:one array of @ma_arr
85
+ # what:string of "カテゴリ" or "漢字読み" or ...
86
+ # Return > info:sting of info related to what
87
+ def get_info(e, what)
88
+ info = ""
89
+ e.each do |elm|
90
+ info = elm.gsub(/#{what}:/, "").delete("\"") if /#{what}:/ =~ elm
91
+ end
92
+ return info
93
+ end
94
+ end
95
+
96
+
97
+ class Knp
98
+ require 'open3'
99
+
100
+ KNP_Version = "4.11"
101
+
102
+ attr_reader :pa_arr, :string, :asitis
103
+
104
+ def initialize(string, id=nil)
105
+ @id = id
106
+ @string = string
107
+ @pa_arr = pa(string)
108
+ @asitis = @pa_arr[2]
109
+ end
110
+
111
+ # Parsing(pa) with KNP
112
+ # Parameter > String for ma
113
+ # Return > Array of console output
114
+ def pa(string, opttion = nil)
115
+
116
+ asitis = []
117
+ paarr = [[]]
118
+ kihonku = [[]]
119
+
120
+ # Juman's input is only Shift-JIS(for Windos)
121
+ string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '')
122
+
123
+ # using open3, execute JUMAN|KNP
124
+ begin
125
+ out, err, status = Open3.capture3("juman | knp -simple ", :stdin_data => string)
126
+ i = -1
127
+ j = -1
128
+ out.each_line do |line|
129
+ line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
130
+ asitis.push(line) unless line == "EOS"
131
+
132
+ # making the array of Bunsetsu(文節)
133
+ if line.split(/\s/)[0] == "*"
134
+ i += 1
135
+ paarr[i] = []
136
+ end
137
+ paarr[i].push(line) unless /^\+.+/ =~ line || line == "EOS"
138
+
139
+ # making the array of Kihonku(基本句)
140
+ if line.split(/\s/)[0] == "+"
141
+ j += 1
142
+ kihonku[j] = []
143
+ end
144
+ kihonku[j].push(line) unless /^\*.+/ =~ line || line == "EOS"
145
+ end
146
+ return paarr, kihonku, asitis
147
+
148
+ rescue
149
+ print("[エラー]:JUMANへPathを通してください。\n")
150
+ exit!
151
+ end
152
+
153
+ end
154
+
155
+ # dependency relations of Bunsetsu
156
+ def bunsetsu
157
+ # bunsetsu_hs
158
+ # key:Information of ma of Bunsetsu(文節) [Array]
159
+ # val:[Info of Bunsetsu, Where its dependence]
160
+ bunsetsu_hs = Hash.new
161
+ pa_tmp = @pa_arr[0]
162
+ pa_tmp.each do |e|
163
+ key = e.shift.split(/\s/)
164
+ key.shift
165
+ bunsetsu_hs[e]= key
166
+ end
167
+ return bunsetsu_hs
168
+ end
169
+
170
+ end
@@ -0,0 +1,170 @@
1
+ require "juman_knp/version"
2
+
3
+ # coding: utf-8
4
+ class Juman
5
+ require 'open3'
6
+
7
+ Juman_Versin = "7.0"
8
+
9
+ Hinshi = ["名詞","助詞","動詞","接尾辞","助動詞","特殊","指示詞","判定詞","未定義語","形容詞","副詞","接頭辞","接続詞","連体詞","感動詞"]
10
+ Category = ["人","組織・団体","動物","植物","動物-部位","植物-部位","人工物-食べ物","人工物-衣類","人工物-乗り物","人工物-金銭","人工物-その他","自然物","場所-施設","場所-施設部位","場所-自然","場所-機能","場所-その他","抽象物","形・模様","色","数量","時間"]
11
+ Domain = ["文化・芸術","レクリエーション","スポーツ","健康・医学","家庭・暮らし","料理・食事","交通","教育・学習","科学・技術","ビジネス","メディア","政治"]
12
+
13
+ attr_reader :ma_arr, :string, :id, :pos
14
+
15
+ def initialize(string, id=nil, pos=nil)
16
+ @id = id # please use for tilte, id , etc...
17
+ @string = string
18
+ @pos = pos # parts of speech(pos)
19
+ @ma_arr = ma(string)
20
+
21
+ unless pos == nil
22
+ @specific_pos = words_of(pos)
23
+ end
24
+ end
25
+
26
+ def array_of(i)
27
+ array_of_i = Array.new
28
+
29
+ case i
30
+ when 0..10
31
+ @ma_arr.each{|e| array_of_i.push(e[i])}
32
+ # 代表表記
33
+ when 17
34
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "代表表記"))}
35
+ # 漢字読み
36
+ when 18
37
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "漢字読み"))}
38
+ # Category
39
+ when 19
40
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "カテゴリ"))}
41
+ # Domain
42
+ when 20
43
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "ドメイン"))}
44
+ else
45
+
46
+ end
47
+ return array_of_i
48
+ end
49
+
50
+ # filter of pos
51
+ def words_of(pos)
52
+ hinshi_arr = Array.new
53
+ pos.each do |h|
54
+ @ma_arr.each{|array_of| hinshi_arr.push(array_of) if h == array_of[3]}
55
+ end
56
+ @ma_arr = hinshi_arr
57
+ return hinshi_arr
58
+ end
59
+
60
+ private
61
+
62
+ # morphological analysis(ma)
63
+ # Parameter > Stiring for ma
64
+ # Return > Array of console output
65
+ def ma(string)
66
+ begin
67
+ maarr = Array.new
68
+ # Juman's input is only Shift-JIS(for Windos)
69
+ string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '')
70
+
71
+ # using open3, execute Juman
72
+ out, err, status = Open3.capture3("juman -b", :stdin_data => string)
73
+ out.each_line do |line|
74
+ line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
75
+ maarr.push(line.split(/\s/)) unless line == "EOS"
76
+ end
77
+ return maarr
78
+ rescue
79
+ print("[エラー]:JUMANへPathを通してください。\n")
80
+ exit!
81
+ end
82
+ end
83
+
84
+ # Parameter > e:one array of @ma_arr
85
+ # what:string of "カテゴリ" or "漢字読み" or ...
86
+ # Return > info:sting of info related to what
87
+ def get_info(e, what)
88
+ info = ""
89
+ e.each do |elm|
90
+ info = elm.gsub(/#{what}:/, "").delete("\"") if /#{what}:/ =~ elm
91
+ end
92
+ return info
93
+ end
94
+ end
95
+
96
+
97
+ class Knp
98
+ require 'open3'
99
+
100
+ KNP_Version = "4.11"
101
+
102
+ attr_reader :pa_arr, :string, :asitis
103
+
104
+ def initialize(string, id=nil)
105
+ @id = id
106
+ @string = string
107
+ @pa_arr = pa(string)
108
+ @asitis = @pa_arr[2]
109
+ end
110
+
111
+ # Parsing(pa) with KNP
112
+ # Parameter > String for ma
113
+ # Return > Array of console output
114
+ def pa(string, opttion = nil)
115
+
116
+ asitis = []
117
+ paarr = [[]]
118
+ kihonku = [[]]
119
+
120
+ # Juman's input is only Shift-JIS(for Windos)
121
+ string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '')
122
+
123
+ # using open3, execute JUMAN|KNP
124
+ begin
125
+ out, err, status = Open3.capture3("juman | knp -simple ", :stdin_data => string)
126
+ i = -1
127
+ j = -1
128
+ out.each_line do |line|
129
+ line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
130
+ asitis.push(line) unless line == "EOS"
131
+
132
+ # making the array of Bunsetsu(文節)
133
+ if line.split(/\s/)[0] == "*"
134
+ i += 1
135
+ paarr[i] = []
136
+ end
137
+ paarr[i].push(line) unless /^\+.+/ =~ line || line == "EOS"
138
+
139
+ # making the array of Kihonku(基本句)
140
+ if line.split(/\s/)[0] == "+"
141
+ j += 1
142
+ kihonku[j] = []
143
+ end
144
+ kihonku[j].push(line) unless /^\*.+/ =~ line || line == "EOS"
145
+ end
146
+ return paarr, kihonku, asitis
147
+
148
+ rescue
149
+ print("[エラー]:JUMANへPathを通してください。\n")
150
+ exit!
151
+ end
152
+
153
+ end
154
+
155
+ # dependency relations of Bunsetsu
156
+ def bunsetsu
157
+ # bunsetsu_hs
158
+ # key:Information of ma of Bunsetsu(文節) [Array]
159
+ # val:[Info of Bunsetsu, Where its dependence]
160
+ bunsetsu_hs = Hash.new
161
+ pa_tmp = @pa_arr[0]
162
+ pa_tmp.each do |e|
163
+ key = e.shift.split(/\s/)
164
+ key.shift
165
+ bunsetsu_hs[e]= key
166
+ end
167
+ return bunsetsu_hs
168
+ end
169
+
170
+ end
@@ -0,0 +1,3 @@
1
+ module JumanKnp
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe JumanKnp do
4
+ it 'has a version number' do
5
+ expect(JumanKnp::VERSION).not_to be nil
6
+ end
7
+
8
+ it 'does something useful' do
9
+ expect(false).to eq(true)
10
+ end
11
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'juman_knp'
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: juman_knp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - rilmayer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: You can use JUMAN and KNP for natural language processing, by Ruby.
56
+ email:
57
+ - git@frsw.net
58
+ executables:
59
+ - juman_knp
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - Gemfile
65
+ - LICENSE
66
+ - README.md
67
+ - Rakefile
68
+ - bin/juman_knp
69
+ - juman_knp.gemspec
70
+ - lib/juman_knp.rb
71
+ - lib/juman_knp.rb~
72
+ - lib/juman_knp/version.rb
73
+ - spec/juman_knp_spec.rb
74
+ - spec/spec_helper.rb
75
+ homepage: http://frsw.net/
76
+ licenses:
77
+ - MIT
78
+ metadata: {}
79
+ post_install_message:
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 2.2.2
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: wrapper of JUMAN and KNP.
99
+ test_files:
100
+ - spec/juman_knp_spec.rb
101
+ - spec/spec_helper.rb