juman_knp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 23f6aff424c726c227a527483945be9f3c9d434e
4
+ data.tar.gz: 645b90b022af79539701713554838c2c67feb2e5
5
+ SHA512:
6
+ metadata.gz: d4bfe2c9d60e20c22c6d9c823ce35d18630f84245aa2d4b55306e3b79e0b37ef8b84470f18fa61debb078928aa38177152da8634830581b467d4e879f15baece
7
+ data.tar.gz: 9fac5baebd623a8487160694a83ad3847824fb48ea81ce03b8dd5b68ee2cc652cbdb3308f5d3dd626612a5a4e270907d5aae14dc6c9cea697241217b252a17ab
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in juman_knp.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 FURUSAWA Tomohiro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,31 @@
1
+ # JumanKnp
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'juman_knp'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install juman_knp
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/juman_knp/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'juman_knp'
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'juman_knp/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "juman_knp"
8
+ spec.version = JumanKnp::VERSION
9
+ spec.authors = ["rilmayer"]
10
+ spec.email = ["git@frsw.net"]
11
+ spec.summary = %q{wrapper of JUMAN and KNP.}
12
+ spec.description = %q{You can use JUMAN and KNP for natural language processing, by Ruby.}
13
+ spec.homepage = "http://frsw.net/"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec"
24
+ end
@@ -0,0 +1,170 @@
1
+ require "juman_knp/version"
2
+
3
+ # coding: utf-8
4
+ class Juman
5
+ require 'open3'
6
+
7
+ Juman_Versin = "7.0"
8
+
9
+ Hinshi = ["名詞","助詞","動詞","接尾辞","助動詞","特殊","指示詞","判定詞","未定義語","形容詞","副詞","接頭辞","接続詞","連体詞","感動詞"]
10
+ Category = ["人","組織・団体","動物","植物","動物-部位","植物-部位","人工物-食べ物","人工物-衣類","人工物-乗り物","人工物-金銭","人工物-その他","自然物","場所-施設","場所-施設部位","場所-自然","場所-機能","場所-その他","抽象物","形・模様","色","数量","時間"]
11
+ Domain = ["文化・芸術","レクリエーション","スポーツ","健康・医学","家庭・暮らし","料理・食事","交通","教育・学習","科学・技術","ビジネス","メディア","政治"]
12
+
13
+ attr_reader :ma_arr, :string, :id, :pos
14
+
15
+ def initialize(string, id=nil, pos=nil)
16
+ @id = id # please use for tilte, id , etc...
17
+ @string = string
18
+ @pos = pos # parts of speech(pos)
19
+ @ma_arr = ma(string)
20
+
21
+ unless pos == nil
22
+ @specific_pos = words_of(pos)
23
+ end
24
+ end
25
+
26
+ def array_of(i)
27
+ array_of_i = Array.new
28
+
29
+ case i
30
+ when 0..10
31
+ @ma_arr.each{|e| array_of_i.push(e[i])}
32
+ # 代表表記
33
+ when 17
34
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "代表表記"))}
35
+ # 漢字読み
36
+ when 18
37
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "漢字読み"))}
38
+ # Category
39
+ when 19
40
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "カテゴリ"))}
41
+ # Domain
42
+ when 20
43
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "ドメイン"))}
44
+ else
45
+
46
+ end
47
+ return array_of_i
48
+ end
49
+
50
+ # filter of pos
51
+ def words_of(pos)
52
+ hinshi_arr = Array.new
53
+ pos.each do |h|
54
+ @ma_arr.each{|array_of| hinshi_arr.push(array_of) if h == array_of[3]}
55
+ end
56
+ @ma_arr = hinshi_arr
57
+ return hinshi_arr
58
+ end
59
+
60
+ private
61
+
62
+ # morphological analysis(ma)
63
+ # Parameter > Stiring for ma
64
+ # Return > Array of console output
65
+ def ma(string)
66
+ begin
67
+ maarr = Array.new
68
+ # Juman's input is only Shift-JIS(for Windos)
69
+ string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '')
70
+
71
+ # using open3, execute Juman
72
+ out, err, status = Open3.capture3("juman -b", :stdin_data => string)
73
+ out.each_line do |line|
74
+ line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
75
+ maarr.push(line.split(/\s/)) unless line == "EOS"
76
+ end
77
+ return maarr
78
+ rescue
79
+ print("[エラー]:JUMANへPathを通してください。\n")
80
+ exit!
81
+ end
82
+ end
83
+
84
+ # Parameter > e:one array of @ma_arr
85
+ # what:string of "カテゴリ" or "漢字読み" or ...
86
+ # Return > info:sting of info related to what
87
+ def get_info(e, what)
88
+ info = ""
89
+ e.each do |elm|
90
+ info = elm.gsub(/#{what}:/, "").delete("\"") if /#{what}:/ =~ elm
91
+ end
92
+ return info
93
+ end
94
+ end
95
+
96
+
97
+ class Knp
98
+ require 'open3'
99
+
100
+ KNP_Version = "4.11"
101
+
102
+ attr_reader :pa_arr, :string, :asitis
103
+
104
+ def initialize(string, id=nil)
105
+ @id = id
106
+ @string = string
107
+ @pa_arr = pa(string)
108
+ @asitis = @pa_arr[2]
109
+ end
110
+
111
+ # Parsing(pa) with KNP
112
+ # Parameter > String for ma
113
+ # Return > Array of console output
114
+ def pa(string, opttion = nil)
115
+
116
+ asitis = []
117
+ paarr = [[]]
118
+ kihonku = [[]]
119
+
120
+ # Juman's input is only Shift-JIS(for Windos)
121
+ string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '')
122
+
123
+ # using open3, execute JUMAN|KNP
124
+ begin
125
+ out, err, status = Open3.capture3("juman | knp -simple ", :stdin_data => string)
126
+ i = -1
127
+ j = -1
128
+ out.each_line do |line|
129
+ line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
130
+ asitis.push(line) unless line == "EOS"
131
+
132
+ # making the array of Bunsetsu(文節)
133
+ if line.split(/\s/)[0] == "*"
134
+ i += 1
135
+ paarr[i] = []
136
+ end
137
+ paarr[i].push(line) unless /^\+.+/ =~ line || line == "EOS"
138
+
139
+ # making the array of Kihonku(基本句)
140
+ if line.split(/\s/)[0] == "+"
141
+ j += 1
142
+ kihonku[j] = []
143
+ end
144
+ kihonku[j].push(line) unless /^\*.+/ =~ line || line == "EOS"
145
+ end
146
+ return paarr, kihonku, asitis
147
+
148
+ rescue
149
+ print("[エラー]:JUMANへPathを通してください。\n")
150
+ exit!
151
+ end
152
+
153
+ end
154
+
155
+ # dependency relations of Bunsetsu
156
+ def bunsetsu
157
+ # bunsetsu_hs
158
+ # key:Information of ma of Bunsetsu(文節) [Array]
159
+ # val:[Info of Bunsetsu, Where its dependence]
160
+ bunsetsu_hs = Hash.new
161
+ pa_tmp = @pa_arr[0]
162
+ pa_tmp.each do |e|
163
+ key = e.shift.split(/\s/)
164
+ key.shift
165
+ bunsetsu_hs[e]= key
166
+ end
167
+ return bunsetsu_hs
168
+ end
169
+
170
+ end
@@ -0,0 +1,170 @@
1
+ require "juman_knp/version"
2
+
3
+ # coding: utf-8
4
+ class Juman
5
+ require 'open3'
6
+
7
+ Juman_Versin = "7.0"
8
+
9
+ Hinshi = ["名詞","助詞","動詞","接尾辞","助動詞","特殊","指示詞","判定詞","未定義語","形容詞","副詞","接頭辞","接続詞","連体詞","感動詞"]
10
+ Category = ["人","組織・団体","動物","植物","動物-部位","植物-部位","人工物-食べ物","人工物-衣類","人工物-乗り物","人工物-金銭","人工物-その他","自然物","場所-施設","場所-施設部位","場所-自然","場所-機能","場所-その他","抽象物","形・模様","色","数量","時間"]
11
+ Domain = ["文化・芸術","レクリエーション","スポーツ","健康・医学","家庭・暮らし","料理・食事","交通","教育・学習","科学・技術","ビジネス","メディア","政治"]
12
+
13
+ attr_reader :ma_arr, :string, :id, :pos
14
+
15
+ def initialize(string, id=nil, pos=nil)
16
+ @id = id # please use for tilte, id , etc...
17
+ @string = string
18
+ @pos = pos # parts of speech(pos)
19
+ @ma_arr = ma(string)
20
+
21
+ unless pos == nil
22
+ @specific_pos = words_of(pos)
23
+ end
24
+ end
25
+
26
+ def array_of(i)
27
+ array_of_i = Array.new
28
+
29
+ case i
30
+ when 0..10
31
+ @ma_arr.each{|e| array_of_i.push(e[i])}
32
+ # 代表表記
33
+ when 17
34
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "代表表記"))}
35
+ # 漢字読み
36
+ when 18
37
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "漢字読み"))}
38
+ # Category
39
+ when 19
40
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "カテゴリ"))}
41
+ # Domain
42
+ when 20
43
+ @ma_arr.each{|e| array_of_i.push(get_info(e, "ドメイン"))}
44
+ else
45
+
46
+ end
47
+ return array_of_i
48
+ end
49
+
50
+ # filter of pos
51
+ def words_of(pos)
52
+ hinshi_arr = Array.new
53
+ pos.each do |h|
54
+ @ma_arr.each{|array_of| hinshi_arr.push(array_of) if h == array_of[3]}
55
+ end
56
+ @ma_arr = hinshi_arr
57
+ return hinshi_arr
58
+ end
59
+
60
+ private
61
+
62
+ # morphological analysis(ma)
63
+ # Parameter > Stiring for ma
64
+ # Return > Array of console output
65
+ def ma(string)
66
+ begin
67
+ maarr = Array.new
68
+ # Juman's input is only Shift-JIS(for Windos)
69
+ string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '')
70
+
71
+ # using open3, execute Juman
72
+ out, err, status = Open3.capture3("juman -b", :stdin_data => string)
73
+ out.each_line do |line|
74
+ line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
75
+ maarr.push(line.split(/\s/)) unless line == "EOS"
76
+ end
77
+ return maarr
78
+ rescue
79
+ print("[エラー]:JUMANへPathを通してください。\n")
80
+ exit!
81
+ end
82
+ end
83
+
84
+ # Parameter > e:one array of @ma_arr
85
+ # what:string of "カテゴリ" or "漢字読み" or ...
86
+ # Return > info:sting of info related to what
87
+ def get_info(e, what)
88
+ info = ""
89
+ e.each do |elm|
90
+ info = elm.gsub(/#{what}:/, "").delete("\"") if /#{what}:/ =~ elm
91
+ end
92
+ return info
93
+ end
94
+ end
95
+
96
+
97
+ class Knp
98
+ require 'open3'
99
+
100
+ KNP_Version = "4.11"
101
+
102
+ attr_reader :pa_arr, :string, :asitis
103
+
104
+ def initialize(string, id=nil)
105
+ @id = id
106
+ @string = string
107
+ @pa_arr = pa(string)
108
+ @asitis = @pa_arr[2]
109
+ end
110
+
111
+ # Parsing(pa) with KNP
112
+ # Parameter > String for ma
113
+ # Return > Array of console output
114
+ def pa(string, opttion = nil)
115
+
116
+ asitis = []
117
+ paarr = [[]]
118
+ kihonku = [[]]
119
+
120
+ # Juman's input is only Shift-JIS(for Windos)
121
+ string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '')
122
+
123
+ # using open3, execute JUMAN|KNP
124
+ begin
125
+ out, err, status = Open3.capture3("juman | knp -simple ", :stdin_data => string)
126
+ i = -1
127
+ j = -1
128
+ out.each_line do |line|
129
+ line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
130
+ asitis.push(line) unless line == "EOS"
131
+
132
+ # making the array of Bunsetsu(文節)
133
+ if line.split(/\s/)[0] == "*"
134
+ i += 1
135
+ paarr[i] = []
136
+ end
137
+ paarr[i].push(line) unless /^\+.+/ =~ line || line == "EOS"
138
+
139
+ # making the array of Kihonku(基本句)
140
+ if line.split(/\s/)[0] == "+"
141
+ j += 1
142
+ kihonku[j] = []
143
+ end
144
+ kihonku[j].push(line) unless /^\*.+/ =~ line || line == "EOS"
145
+ end
146
+ return paarr, kihonku, asitis
147
+
148
+ rescue
149
+ print("[エラー]:JUMANへPathを通してください。\n")
150
+ exit!
151
+ end
152
+
153
+ end
154
+
155
+ # dependency relations of Bunsetsu
156
+ def bunsetsu
157
+ # bunsetsu_hs
158
+ # key:Information of ma of Bunsetsu(文節) [Array]
159
+ # val:[Info of Bunsetsu, Where its dependence]
160
+ bunsetsu_hs = Hash.new
161
+ pa_tmp = @pa_arr[0]
162
+ pa_tmp.each do |e|
163
+ key = e.shift.split(/\s/)
164
+ key.shift
165
+ bunsetsu_hs[e]= key
166
+ end
167
+ return bunsetsu_hs
168
+ end
169
+
170
+ end
@@ -0,0 +1,3 @@
1
+ module JumanKnp
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe JumanKnp do
4
+ it 'has a version number' do
5
+ expect(JumanKnp::VERSION).not_to be nil
6
+ end
7
+
8
+ it 'does something useful' do
9
+ expect(false).to eq(true)
10
+ end
11
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'juman_knp'
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: juman_knp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - rilmayer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: You can use JUMAN and KNP for natural language processing, by Ruby.
56
+ email:
57
+ - git@frsw.net
58
+ executables:
59
+ - juman_knp
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - Gemfile
65
+ - LICENSE
66
+ - README.md
67
+ - Rakefile
68
+ - bin/juman_knp
69
+ - juman_knp.gemspec
70
+ - lib/juman_knp.rb
71
+ - lib/juman_knp.rb~
72
+ - lib/juman_knp/version.rb
73
+ - spec/juman_knp_spec.rb
74
+ - spec/spec_helper.rb
75
+ homepage: http://frsw.net/
76
+ licenses:
77
+ - MIT
78
+ metadata: {}
79
+ post_install_message:
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 2.2.2
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: wrapper of JUMAN and KNP.
99
+ test_files:
100
+ - spec/juman_knp_spec.rb
101
+ - spec/spec_helper.rb