chawan 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +45 -6
- data/Rakefile +1 -1
- data/lib/chawan/analyzers/chasen_analyzer.rb +2 -1
- data/lib/chawan/node.rb +13 -1
- data/lib/chawan/nodes.rb +72 -0
- data/lib/chawan.rb +1 -0
- data/spec/node_spec.rb +2 -0
- data/spec/nodes_spec.rb +68 -0
- data/spec/provide_helper.rb +36 -0
- data/spec/spec_helper.rb +1 -0
- metadata +5 -2
data/README
CHANGED
@@ -17,7 +17,13 @@ Methods
|
|
17
17
|
Class
|
18
18
|
=====
|
19
19
|
|
20
|
-
* Chawan::
|
20
|
+
* Chawan::Nodes (Chawan.parse returns a Chawan::Nodes)
|
21
|
+
#noun : scope category with noun
|
22
|
+
#verb : scope category with verb
|
23
|
+
#grep : scope category with given pattern
|
24
|
+
#compact : mix the category-consecutive nodes
|
25
|
+
|
26
|
+
* Chawan::Node (Chawan::Nodes has many Chawan::Node(s))
|
21
27
|
#category : part of speech
|
22
28
|
#word : text
|
23
29
|
#attributes : keys and vals hash
|
@@ -26,11 +32,45 @@ Class
|
|
26
32
|
Example
|
27
33
|
=======
|
28
34
|
|
29
|
-
|
30
|
-
|
35
|
+
text = '本日はバリ晴天なりっす'
|
36
|
+
|
37
|
+
# 'parse' returns a Chawan::Nodes
|
38
|
+
Chawan.parse(text)
|
39
|
+
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: 'バリ'>, <名詞: '晴天'>, <助動詞: 'なり'>, <助動詞: 'っす'>]
|
40
|
+
|
41
|
+
# Chawan::Nodes is enumerable
|
42
|
+
Chawan.parse(text).select{|node| node.category == '名詞'}
|
43
|
+
=> [<名詞: '本日'>, <名詞: 'バリ'>, <名詞: '晴天'>]
|
44
|
+
|
45
|
+
# gateway interface: noun
|
46
|
+
Chawan.parse(text).noun
|
47
|
+
=> [<名詞: '本日'>, <名詞: 'バリ'>, <名詞: '晴天'>]
|
48
|
+
|
49
|
+
# gateway interface: verb
|
50
|
+
Chawan.parse(text).verb
|
51
|
+
=> [<助動詞: 'なり'>, <助動詞: 'っす'>]
|
52
|
+
|
53
|
+
# gateway interface: grep
|
54
|
+
Chawan.parse(text).grep(/助/)
|
55
|
+
=> [<助詞: 'は'>, <助動詞: 'なり'>, <助動詞: 'っす'>]
|
56
|
+
|
57
|
+
# gateway interface: compact
|
58
|
+
Chawan.parse(text).compact
|
59
|
+
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: 'バリ晴天'>, <助動詞: 'なりっす'>]
|
60
|
+
Chawan.parse(text).compact(/名詞/)
|
61
|
+
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: 'バリ晴天'>, <助動詞: 'なり'>, <助動詞: 'っす'>]
|
62
|
+
|
63
|
+
# gateway interface is chainable
|
64
|
+
Chawan.parse(text).noun.verb
|
65
|
+
=> []
|
31
66
|
|
32
|
-
|
33
|
-
|
67
|
+
# chainable is fun!
|
68
|
+
Chawan.parse(text).noun
|
69
|
+
=> [<名詞: '本日'>, <名詞: 'バリ'>, <名詞: '晴天'>]
|
70
|
+
Chawan.parse(text).compact.noun
|
71
|
+
=> [<名詞: '本日'>, <名詞: 'バリ晴天'>]
|
72
|
+
Chawan.parse(text).noun.compact
|
73
|
+
=> [<名詞: '本日バリ晴天'>]
|
34
74
|
|
35
75
|
|
36
76
|
Analyzer
|
@@ -64,7 +104,6 @@ Required
|
|
64
104
|
Todo
|
65
105
|
====
|
66
106
|
|
67
|
-
* gateway interface to Chawan#parse such as grep, noun, ...
|
68
107
|
* use open3 rather than backquote for executing unix commands
|
69
108
|
|
70
109
|
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
|
|
6
6
|
EMAIL = "maiha@wota.jp"
|
7
7
|
HOMEPAGE = "http://github.com/maiha/chawan"
|
8
8
|
SUMMARY = "A cup for chasen that provides an easy to use for extracting Japanese"
|
9
|
-
GEM_VERSION = "0.0.
|
9
|
+
GEM_VERSION = "0.0.3"
|
10
10
|
|
11
11
|
spec = Gem::Specification.new do |s|
|
12
12
|
s.rubyforge_project = 'asakusarb'
|
@@ -10,7 +10,8 @@ module Chawan
|
|
10
10
|
lines = execute(text).split(/\n/)
|
11
11
|
lines.pop == "EOS" or # "EOS"
|
12
12
|
raise CannotAnalyze
|
13
|
-
|
13
|
+
nodes = lines.map{|line| instantiate(line, self.class::Fields)}
|
14
|
+
return Nodes.new(nodes)
|
14
15
|
end
|
15
16
|
|
16
17
|
private
|
data/lib/chawan/node.rb
CHANGED
@@ -1,8 +1,20 @@
|
|
1
1
|
module Chawan
|
2
2
|
class Node
|
3
|
-
|
3
|
+
attr_accessor :vals
|
4
4
|
attr_reader :keys
|
5
5
|
|
6
|
+
module GatewayInterface
|
7
|
+
def noun?
|
8
|
+
category.to_s =~ /名詞/
|
9
|
+
end
|
10
|
+
|
11
|
+
def verb?
|
12
|
+
category.to_s =~ /動詞/
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
include GatewayInterface
|
17
|
+
|
6
18
|
def initialize(vals, keys)
|
7
19
|
@vals = vals
|
8
20
|
@keys = keys
|
data/lib/chawan/nodes.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module Chawan
|
2
|
+
class Nodes
|
3
|
+
attr_reader :nodes
|
4
|
+
|
5
|
+
def initialize(nodes)
|
6
|
+
@nodes = nodes
|
7
|
+
end
|
8
|
+
|
9
|
+
def ==(other)
|
10
|
+
self.class == other.class and map(&:to_s) == other.map(&:to_s)
|
11
|
+
end
|
12
|
+
|
13
|
+
######################################################################
|
14
|
+
### Enumerable
|
15
|
+
|
16
|
+
include Enumerable
|
17
|
+
|
18
|
+
def each(&block)
|
19
|
+
nodes.__send__(:each, &block)
|
20
|
+
end
|
21
|
+
|
22
|
+
def inspect
|
23
|
+
"[%s]" % map(&:inspect).join(', ')
|
24
|
+
end
|
25
|
+
|
26
|
+
######################################################################
|
27
|
+
### GatewayInterface
|
28
|
+
|
29
|
+
def grep(pattern, &block)
|
30
|
+
if block
|
31
|
+
nodes.__send__(regexp, &block)
|
32
|
+
else
|
33
|
+
Nodes.new(nodes.select{|n| pattern === n.category})
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def compact(pattern = //)
|
38
|
+
array = []
|
39
|
+
prev = nil
|
40
|
+
each do |node|
|
41
|
+
if array.last and (pattern === node.category) and array.last.category == node.category
|
42
|
+
array.last.word << node.word.to_s
|
43
|
+
else
|
44
|
+
array << Node.new(node.vals.map(&:dup), node.keys)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
self.class.new(array)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.gateway_interfaces
|
51
|
+
Chawan::Node::GatewayInterface.instance_methods.map(&:to_s).grep(/\?$/){$`}
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.define_gateway_interface(name)
|
55
|
+
class_eval %{
|
56
|
+
def #{name}
|
57
|
+
Nodes.new(nodes.select{|n| n.__send__("#{name}?")})
|
58
|
+
end
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
def method_missing(name, *arguments, &block)
|
64
|
+
if self.class.gateway_interfaces.include?(name.to_s)
|
65
|
+
self.class.define_gateway_interface(name)
|
66
|
+
__send__(name)
|
67
|
+
else
|
68
|
+
nodes.__send__(name, *arguments, &block)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/chawan.rb
CHANGED
data/spec/node_spec.rb
CHANGED
data/spec/nodes_spec.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
|
2
|
+
require File.join(File.dirname(__FILE__), 'spec_helper.rb')
|
3
|
+
|
4
|
+
module Spec
|
5
|
+
module Example
|
6
|
+
module Subject
|
7
|
+
module ExampleGroupMethods
|
8
|
+
def gateway_interface(name, &block)
|
9
|
+
it "should provide gateway interface ##{name}" do
|
10
|
+
lambda { subject.__send__(name) }.should_not raise_error
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should return an instance of Chawan::Nodes" do
|
14
|
+
subject.__send__(name).should be_a_kind_of(Chawan::Nodes)
|
15
|
+
end
|
16
|
+
|
17
|
+
its(name, &block)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
describe Chawan::Nodes do
|
26
|
+
before do
|
27
|
+
keys = ["表記","品詞"]
|
28
|
+
@n1 = Chawan::Node.new(["本日","名詞"], keys)
|
29
|
+
@p1 = Chawan::Node.new(["は" ,"助詞"], keys)
|
30
|
+
@n2 = Chawan::Node.new(["マジ","名詞"], keys)
|
31
|
+
@n3 = Chawan::Node.new(["晴天","名詞"], keys)
|
32
|
+
@v1 = Chawan::Node.new(["なり","助動詞"], keys)
|
33
|
+
@v2 = Chawan::Node.new(["っす","助動詞"], keys)
|
34
|
+
@nodes = Chawan::Nodes.new([@n1, @p1, @n2, @n3, @v1, @v2])
|
35
|
+
|
36
|
+
@n23 = Chawan::Node.new(["マジ晴天","名詞"], keys)
|
37
|
+
@v12 = Chawan::Node.new(["なりっす","助動詞"], keys)
|
38
|
+
end
|
39
|
+
|
40
|
+
subject { @nodes }
|
41
|
+
|
42
|
+
######################################################################
|
43
|
+
### Gateway Interface
|
44
|
+
|
45
|
+
gateway_interface(:noun) { should == Chawan::Nodes.new([@n1, @n2, @n3]) }
|
46
|
+
gateway_interface(:verb) { should == Chawan::Nodes.new([@v1, @v2]) }
|
47
|
+
|
48
|
+
gateway_interface(:compact) { should == Chawan::Nodes.new([@n1, @p1, @n23, @v12]) }
|
49
|
+
it "#compact(/名詞/) should compact only nodes matched /名詞/" do
|
50
|
+
subject.compact(/名詞/).should == Chawan::Nodes.new([@n1, @p1, @n23, @v1, @v2])
|
51
|
+
end
|
52
|
+
|
53
|
+
provide :grep
|
54
|
+
describe "#grep" do
|
55
|
+
it "should scope nodes by given category" do
|
56
|
+
subject.grep(/名詞/).should == Chawan::Nodes.new([@n1, @n2, @n3])
|
57
|
+
subject.grep('名詞').should == Chawan::Nodes.new([@n1, @n2, @n3])
|
58
|
+
|
59
|
+
subject.grep(/助/).should == Chawan::Nodes.new([@p1, @v1, @v2])
|
60
|
+
subject.grep('助').should == Chawan::Nodes.new([])
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
######################################################################
|
65
|
+
### Enumerable
|
66
|
+
|
67
|
+
provide :each
|
68
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
######################################################################
|
2
|
+
### provide matcher
|
3
|
+
Spec::Matchers.define :provide do |expected|
|
4
|
+
match do |obj|
|
5
|
+
(obj.public_methods + obj.protected_methods + obj.private_methods).include?(expected.to_s)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
module Spec
|
10
|
+
module Example
|
11
|
+
module Subject
|
12
|
+
module ExampleGroupMethods
|
13
|
+
# == Examples
|
14
|
+
#
|
15
|
+
# describe User do
|
16
|
+
# subject { User.new }
|
17
|
+
# provide :name
|
18
|
+
#
|
19
|
+
# [intead of]
|
20
|
+
#
|
21
|
+
# it "should provide #name" do
|
22
|
+
# methods = subject.public_methods + subject.protected_methods + subject.private_methods
|
23
|
+
# methods.should include("name")
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
def provide(name)
|
28
|
+
it "should provide ##{name}" do
|
29
|
+
subject.should provide(name)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chawan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- maiha
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-06 00:00:00 +09:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -33,10 +33,13 @@ files:
|
|
33
33
|
- lib/chawan/analyzers/manager.rb
|
34
34
|
- lib/chawan/node.rb
|
35
35
|
- lib/chawan/commands.rb
|
36
|
+
- lib/chawan/nodes.rb
|
36
37
|
- lib/chawan.rb
|
37
38
|
- spec/chasen_spec.rb
|
38
39
|
- spec/api_spec.rb
|
40
|
+
- spec/nodes_spec.rb
|
39
41
|
- spec/mecab_spec.rb
|
42
|
+
- spec/provide_helper.rb
|
40
43
|
- spec/analyzer_spec.rb
|
41
44
|
- spec/fixtures/example1.txt
|
42
45
|
- spec/fixtures/example2.txt
|