juman 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cdf875c27933e91b0074f8600918e870c2ac06c0
4
+ data.tar.gz: 32f94ccb320b7c46216293a329dd30432a3678d5
5
+ SHA512:
6
+ metadata.gz: e0653e988f396fd83bd908e280feb4f07518daea65d019b99d8fa5a8f770a1e1a6ca292ad522a4ad766476589adca7c9dbaade22b3cbc268f11633a0528fdb19
7
+ data.tar.gz: 755aa941d413a7980f4aa7cd9a849674ad3713b7049a031c1972ccfdf7820b867a52187d675754e299adb6e57e02f3271017b7d8a84576592abffe05ffaffa8d
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in juman.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Hajime WAKAHARA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ # Juman [![Build Status](https://travis-ci.org/hadzimme/juman.png)](https://travis-ci.org/hadzimme/juman)
2
+
3
+ Use a sequence of morphemes as an Enumerable object.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'juman'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install juman
18
+
19
+ ## Usage
20
+
21
+ ```ruby
22
+ require 'juman'
23
+
24
+ juman = Juman.new
25
+ text = 'この文を形態素解析してください。'
26
+ result = juman.analyze(text)
27
+ result[0].surface #=> "この"
28
+ result.map{|morpheme| morpheme.surface }
29
+ #=> ["この", "文", "を", "形態", "素", "解析", "して", "ください", "。"]
30
+ ```
31
+
32
+ ## Contributing
33
+
34
+ 1. Fork it
35
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
36
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
37
+ 4. Push to the branch (`git push origin my-new-feature`)
38
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ Bundler.setup
4
+ require 'rspec/core/rake_task'
5
+
6
+ desc 'run spec'
7
+ RSpec::Core::RakeTask.new(:spec) do |t|
8
+ t.rspec_opts = ["-c", "-fs"]
9
+ end
10
+
11
+ task :default => :spec
data/juman.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'juman/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "juman"
8
+ gem.version = Juman::VERSION
9
+ gem.authors = ["Hajime WAKAHARA"]
10
+ gem.email = ["hajime.wakahara@gmail.com"]
11
+ gem.description = %q{Use a sequence of morphemes as an Enumerable object.}
12
+ gem.summary = %q{A Wrapper for JUMAN: A Morphological Analyzer}
13
+ gem.homepage = "https://github.com/hadzimme/juman"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency 'rake'
21
+ gem.add_development_dependency 'rspec'
22
+ end
@@ -0,0 +1,32 @@
1
+ class Juman
2
+ class Morpheme
3
+ def initialize(line)
4
+ attributes = line.split(/\s/)
5
+ @surface, @pronunciation, @base, @pos = attributes.shift(4)
6
+ @pos_id, @pos_spec_id, @type_id, @form_id =
7
+ attributes.values_at(0, 2, 4, 6).map{|id_str| id_str.to_i }
8
+ @pos_spec, @type, @form =
9
+ attributes.values_at(1, 3, 5).map{|attr| normalize_attr(attr) }
10
+ @info = normalize_info(attributes[7..-1].join(' '))
11
+ end
12
+ attr_reader :surface, :pronunciation, :base, :pos, :pos_id, :pos_spec,
13
+ :pos_spec_id, :type, :type_id, :form, :form_id, :info
14
+
15
+ private
16
+ def normalize_attr(candidate)
17
+ if candidate == '*'
18
+ nil
19
+ else
20
+ candidate
21
+ end
22
+ end
23
+
24
+ def normalize_info(candidate)
25
+ if candidate == 'NIL'
26
+ nil
27
+ else
28
+ eval(candidate)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,16 @@
1
+ class Juman
2
+ class Process
3
+ def initialize(command)
4
+ @io = IO.popen(command, 'r+')
5
+ end
6
+
7
+ def parse_to_enum(text)
8
+ @io.puts(text)
9
+ Enumerator.new do |y|
10
+ until @io.gets[/^EOS$/]
11
+ y << $_
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,23 @@
1
+ class Juman
2
+ class Result
3
+ include Enumerable
4
+ def initialize(lines)
5
+ @morphemes = lines.map{|line| Morpheme.new(line) }
6
+ end
7
+
8
+ def each(&block)
9
+ if block_given?
10
+ @morphemes.each(&block)
11
+ self
12
+ else
13
+ self.to_enum
14
+ end
15
+ end
16
+
17
+ def [](nth)
18
+ @morphemes[nth]
19
+ end
20
+
21
+ alias at []
22
+ end
23
+ end
@@ -0,0 +1,3 @@
1
+ class Juman
2
+ VERSION = "0.0.1"
3
+ end
data/lib/juman.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'juman/version'
2
+ require 'juman/process'
3
+ require 'juman/result'
4
+ require 'juman/morpheme'
5
+
6
+ class Juman
7
+ def initialize
8
+ @process = Process.new('juman -B -e2')
9
+ end
10
+
11
+ def analyze(text)
12
+ Result.new(@process.parse_to_enum(text))
13
+ end
14
+ end
data/spec/bin/juman ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ require 'optparse'
5
+
6
+ opt = OptionParser.new
7
+ opt.on('-e2')
8
+ opt.on('-B')
9
+ opt.parse!(ARGV)
10
+
11
+ gets
12
+ puts <<JUMAN
13
+ 見る みる 見る 動詞 2 * 0 母音動詞 1 基本形 2 "代表表記:見る/みる 補文ト 自他動詞:自:見える/みえる"
14
+ EOS
15
+ JUMAN
@@ -0,0 +1,92 @@
1
+ # coding: utf-8
2
+
3
+ require 'rspec'
4
+ require 'juman'
5
+
6
+ ENV['PATH'] = "#{File.expand_path(File.dirname(__FILE__))}/bin:#{ENV['PATH']}"
7
+
8
+ describe Juman::Morpheme do
9
+ context 'when initialized with a line of the result of "見る"' do
10
+ subject { Juman::Morpheme.new(
11
+ "見る みる 見る 動詞 2 * 0 母音動詞 1 基本形 2 \"情 報\"\n") }
12
+ its(:surface){ should eq '見る' }
13
+ its(:pronunciation){ should eq 'みる' }
14
+ its(:base){ should eq '見る' }
15
+ its(:pos){ should eq '動詞' }
16
+ its(:pos_id){ should be 2 }
17
+ its(:pos_spec){ should be_nil }
18
+ its(:pos_spec_id){ should be 0 }
19
+ its(:type){ should eq '母音動詞' }
20
+ its(:type_id){ should be 1 }
21
+ its(:form){ should eq '基本形' }
22
+ its(:form_id){ should be 2 }
23
+ its(:info){ should eq '情 報' }
24
+ end
25
+ end
26
+ describe Juman::Result do
27
+ context 'when initialized with an Enumerator of the result of "見る"' do
28
+ before { @result = Juman::Result.new(
29
+ ["見る みる 見る 動詞 2 * 0 母音動詞 1 基本形 2 \"情 報\""].to_enum) }
30
+ subject { @result }
31
+ it { should be_an Enumerable }
32
+ it { should respond_to :each }
33
+ it { should respond_to :[] }
34
+ it { should respond_to :at }
35
+ describe '#[]' do
36
+ context 'when argument 0' do
37
+ subject { @result[0] }
38
+ it 'should return Juman::Morpheme' do
39
+ should be_an_instance_of Juman::Morpheme
40
+ end
41
+ end
42
+ end
43
+ describe '#each' do
44
+ context 'without block' do
45
+ subject { @result.each }
46
+ it 'should return Enumerator' do
47
+ should be_an_instance_of Enumerator
48
+ end
49
+ end
50
+ context 'with block' do
51
+ subject { @result.each{} }
52
+ it 'should return self' do
53
+ should be @result
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ describe Juman::Process do
60
+ before { @process = Juman::Process.new('juman -e2 -B') }
61
+ describe '#parse_to_enum' do
62
+ context 'when argument "見る"' do
63
+ subject { @process.parse_to_enum('見る') }
64
+ it 'should return Enumerator' do
65
+ should be_an_instance_of Enumerator
66
+ end
67
+ end
68
+ end
69
+ end
70
+ describe Juman do
71
+ before { @juman = Juman.new }
72
+ subject { @juman }
73
+ it { should respond_to :analyze }
74
+ describe '#analyze' do
75
+ context 'when argument "見る"' do
76
+ before { @result = @juman.analyze('見る') }
77
+ it 'should return Juman::Result' do
78
+ @result.should be_an_instance_of Juman::Result
79
+ end
80
+ describe 'returned Juman::Result' do
81
+ subject { @result }
82
+ describe '#[]' do
83
+ context 'when argument 0' do
84
+ it 'should return Juman::Morpheme' do
85
+ @result[0].should be_an_instance_of Juman::Morpheme
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: juman
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Hajime WAKAHARA
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Use a sequence of morphemes as an Enumerable object.
42
+ email:
43
+ - hajime.wakahara@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - Gemfile
50
+ - LICENSE.txt
51
+ - README.md
52
+ - Rakefile
53
+ - juman.gemspec
54
+ - lib/juman.rb
55
+ - lib/juman/morpheme.rb
56
+ - lib/juman/process.rb
57
+ - lib/juman/result.rb
58
+ - lib/juman/version.rb
59
+ - spec/bin/juman
60
+ - spec/juman_spec.rb
61
+ homepage: https://github.com/hadzimme/juman
62
+ licenses: []
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.0.3
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: 'A Wrapper for JUMAN: A Morphological Analyzer'
84
+ test_files:
85
+ - spec/bin/juman
86
+ - spec/juman_spec.rb