chawan 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +45 -6
- data/Rakefile +1 -1
- data/lib/chawan/analyzers/chasen_analyzer.rb +2 -1
- data/lib/chawan/node.rb +13 -1
- data/lib/chawan/nodes.rb +72 -0
- data/lib/chawan.rb +1 -0
- data/spec/node_spec.rb +2 -0
- data/spec/nodes_spec.rb +68 -0
- data/spec/provide_helper.rb +36 -0
- data/spec/spec_helper.rb +1 -0
- metadata +5 -2
data/README
CHANGED
@@ -17,7 +17,13 @@ Methods
|
|
17
17
|
Class
|
18
18
|
=====
|
19
19
|
|
20
|
-
* Chawan::
|
20
|
+
* Chawan::Nodes (Chawan.parse returns a Chawan::Nodes)
|
21
|
+
#noun : scope category with noun
|
22
|
+
#verb : scope category with verb
|
23
|
+
#grep : scope category with given pattern
|
24
|
+
#compact : mix the category-consecutive nodes
|
25
|
+
|
26
|
+
* Chawan::Node (Chawan::Nodes has many Chawan::Node(s))
|
21
27
|
#category : part of speech
|
22
28
|
#word : text
|
23
29
|
#attributes : keys and vals hash
|
@@ -26,11 +32,45 @@ Class
|
|
26
32
|
Example
|
27
33
|
=======
|
28
34
|
|
29
|
-
|
30
|
-
|
35
|
+
text = '本日はバリ晴天なりっす'
|
36
|
+
|
37
|
+
# 'parse' returns a Chawan::Nodes
|
38
|
+
Chawan.parse(text)
|
39
|
+
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: 'バリ'>, <名詞: '晴天'>, <助動詞: 'なり'>, <助動詞: 'っす'>]
|
40
|
+
|
41
|
+
# Chawan::Nodes is enumerable
|
42
|
+
Chawan.parse(text).select{|node| node.category == '名詞'}
|
43
|
+
=> [<名詞: '本日'>, <名詞: 'バリ'>, <名詞: '晴天'>]
|
44
|
+
|
45
|
+
# gateway interface: noun
|
46
|
+
Chawan.parse(text).noun
|
47
|
+
=> [<名詞: '本日'>, <名詞: 'バリ'>, <名詞: '晴天'>]
|
48
|
+
|
49
|
+
# gateway interface: verb
|
50
|
+
Chawan.parse(text).verb
|
51
|
+
=> [<助動詞: 'なり'>, <助動詞: 'っす'>]
|
52
|
+
|
53
|
+
# gateway interface: grep
|
54
|
+
Chawan.parse(text).grep(/助/)
|
55
|
+
=> [<助詞: 'は'>, <助動詞: 'なり'>, <助動詞: 'っす'>]
|
56
|
+
|
57
|
+
# gateway interface: compact
|
58
|
+
Chawan.parse(text).compact
|
59
|
+
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: 'バリ晴天'>, <助動詞: 'なりっす'>]
|
60
|
+
Chawan.parse(text).compact(/名詞/)
|
61
|
+
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: 'バリ晴天'>, <助動詞: 'なり'>, <助動詞: 'っす'>]
|
62
|
+
|
63
|
+
# gateway interface is chainable
|
64
|
+
Chawan.parse(text).noun.verb
|
65
|
+
=> []
|
31
66
|
|
32
|
-
|
33
|
-
|
67
|
+
# chainable is fun!
|
68
|
+
Chawan.parse(text).noun
|
69
|
+
=> [<名詞: '本日'>, <名詞: 'バリ'>, <名詞: '晴天'>]
|
70
|
+
Chawan.parse(text).compact.noun
|
71
|
+
=> [<名詞: '本日'>, <名詞: 'バリ晴天'>]
|
72
|
+
Chawan.parse(text).noun.compact
|
73
|
+
=> [<名詞: '本日バリ晴天'>]
|
34
74
|
|
35
75
|
|
36
76
|
Analyzer
|
@@ -64,7 +104,6 @@ Required
|
|
64
104
|
Todo
|
65
105
|
====
|
66
106
|
|
67
|
-
* gateway interface to Chawan#parse such as grep, noun, ...
|
68
107
|
* use open3 rather than backquote for executing unix commands
|
69
108
|
|
70
109
|
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
|
|
6
6
|
EMAIL = "maiha@wota.jp"
|
7
7
|
HOMEPAGE = "http://github.com/maiha/chawan"
|
8
8
|
SUMMARY = "A cup for chasen that provides an easy to use for extracting Japanese"
|
9
|
-
GEM_VERSION = "0.0.
|
9
|
+
GEM_VERSION = "0.0.3"
|
10
10
|
|
11
11
|
spec = Gem::Specification.new do |s|
|
12
12
|
s.rubyforge_project = 'asakusarb'
|
@@ -10,7 +10,8 @@ module Chawan
|
|
10
10
|
lines = execute(text).split(/\n/)
|
11
11
|
lines.pop == "EOS" or # "EOS"
|
12
12
|
raise CannotAnalyze
|
13
|
-
|
13
|
+
nodes = lines.map{|line| instantiate(line, self.class::Fields)}
|
14
|
+
return Nodes.new(nodes)
|
14
15
|
end
|
15
16
|
|
16
17
|
private
|
data/lib/chawan/node.rb
CHANGED
@@ -1,8 +1,20 @@
|
|
1
1
|
module Chawan
|
2
2
|
class Node
|
3
|
-
|
3
|
+
attr_accessor :vals
|
4
4
|
attr_reader :keys
|
5
5
|
|
6
|
+
module GatewayInterface
|
7
|
+
def noun?
|
8
|
+
category.to_s =~ /名詞/
|
9
|
+
end
|
10
|
+
|
11
|
+
def verb?
|
12
|
+
category.to_s =~ /動詞/
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
include GatewayInterface
|
17
|
+
|
6
18
|
def initialize(vals, keys)
|
7
19
|
@vals = vals
|
8
20
|
@keys = keys
|
data/lib/chawan/nodes.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module Chawan
|
2
|
+
class Nodes
|
3
|
+
attr_reader :nodes
|
4
|
+
|
5
|
+
def initialize(nodes)
|
6
|
+
@nodes = nodes
|
7
|
+
end
|
8
|
+
|
9
|
+
def ==(other)
|
10
|
+
self.class == other.class and map(&:to_s) == other.map(&:to_s)
|
11
|
+
end
|
12
|
+
|
13
|
+
######################################################################
|
14
|
+
### Enumerable
|
15
|
+
|
16
|
+
include Enumerable
|
17
|
+
|
18
|
+
def each(&block)
|
19
|
+
nodes.__send__(:each, &block)
|
20
|
+
end
|
21
|
+
|
22
|
+
def inspect
|
23
|
+
"[%s]" % map(&:inspect).join(', ')
|
24
|
+
end
|
25
|
+
|
26
|
+
######################################################################
|
27
|
+
### GatewayInterface
|
28
|
+
|
29
|
+
def grep(pattern, &block)
|
30
|
+
if block
|
31
|
+
nodes.__send__(regexp, &block)
|
32
|
+
else
|
33
|
+
Nodes.new(nodes.select{|n| pattern === n.category})
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def compact(pattern = //)
|
38
|
+
array = []
|
39
|
+
prev = nil
|
40
|
+
each do |node|
|
41
|
+
if array.last and (pattern === node.category) and array.last.category == node.category
|
42
|
+
array.last.word << node.word.to_s
|
43
|
+
else
|
44
|
+
array << Node.new(node.vals.map(&:dup), node.keys)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
self.class.new(array)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.gateway_interfaces
|
51
|
+
Chawan::Node::GatewayInterface.instance_methods.map(&:to_s).grep(/\?$/){$`}
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.define_gateway_interface(name)
|
55
|
+
class_eval %{
|
56
|
+
def #{name}
|
57
|
+
Nodes.new(nodes.select{|n| n.__send__("#{name}?")})
|
58
|
+
end
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
def method_missing(name, *arguments, &block)
|
64
|
+
if self.class.gateway_interfaces.include?(name.to_s)
|
65
|
+
self.class.define_gateway_interface(name)
|
66
|
+
__send__(name)
|
67
|
+
else
|
68
|
+
nodes.__send__(name, *arguments, &block)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/chawan.rb
CHANGED
data/spec/node_spec.rb
CHANGED
data/spec/nodes_spec.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
|
2
|
+
require File.join(File.dirname(__FILE__), 'spec_helper.rb')
|
3
|
+
|
4
|
+
module Spec
|
5
|
+
module Example
|
6
|
+
module Subject
|
7
|
+
module ExampleGroupMethods
|
8
|
+
def gateway_interface(name, &block)
|
9
|
+
it "should provide gateway interface ##{name}" do
|
10
|
+
lambda { subject.__send__(name) }.should_not raise_error
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should return an instance of Chawan::Nodes" do
|
14
|
+
subject.__send__(name).should be_a_kind_of(Chawan::Nodes)
|
15
|
+
end
|
16
|
+
|
17
|
+
its(name, &block)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
describe Chawan::Nodes do
|
26
|
+
before do
|
27
|
+
keys = ["表記","品詞"]
|
28
|
+
@n1 = Chawan::Node.new(["本日","名詞"], keys)
|
29
|
+
@p1 = Chawan::Node.new(["は" ,"助詞"], keys)
|
30
|
+
@n2 = Chawan::Node.new(["マジ","名詞"], keys)
|
31
|
+
@n3 = Chawan::Node.new(["晴天","名詞"], keys)
|
32
|
+
@v1 = Chawan::Node.new(["なり","助動詞"], keys)
|
33
|
+
@v2 = Chawan::Node.new(["っす","助動詞"], keys)
|
34
|
+
@nodes = Chawan::Nodes.new([@n1, @p1, @n2, @n3, @v1, @v2])
|
35
|
+
|
36
|
+
@n23 = Chawan::Node.new(["マジ晴天","名詞"], keys)
|
37
|
+
@v12 = Chawan::Node.new(["なりっす","助動詞"], keys)
|
38
|
+
end
|
39
|
+
|
40
|
+
subject { @nodes }
|
41
|
+
|
42
|
+
######################################################################
|
43
|
+
### Gateway Interface
|
44
|
+
|
45
|
+
gateway_interface(:noun) { should == Chawan::Nodes.new([@n1, @n2, @n3]) }
|
46
|
+
gateway_interface(:verb) { should == Chawan::Nodes.new([@v1, @v2]) }
|
47
|
+
|
48
|
+
gateway_interface(:compact) { should == Chawan::Nodes.new([@n1, @p1, @n23, @v12]) }
|
49
|
+
it "#compact(/名詞/) should compact only nodes matched /名詞/" do
|
50
|
+
subject.compact(/名詞/).should == Chawan::Nodes.new([@n1, @p1, @n23, @v1, @v2])
|
51
|
+
end
|
52
|
+
|
53
|
+
provide :grep
|
54
|
+
describe "#grep" do
|
55
|
+
it "should scope nodes by given category" do
|
56
|
+
subject.grep(/名詞/).should == Chawan::Nodes.new([@n1, @n2, @n3])
|
57
|
+
subject.grep('名詞').should == Chawan::Nodes.new([@n1, @n2, @n3])
|
58
|
+
|
59
|
+
subject.grep(/助/).should == Chawan::Nodes.new([@p1, @v1, @v2])
|
60
|
+
subject.grep('助').should == Chawan::Nodes.new([])
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
######################################################################
|
65
|
+
### Enumerable
|
66
|
+
|
67
|
+
provide :each
|
68
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
######################################################################
|
2
|
+
### provide matcher
|
3
|
+
Spec::Matchers.define :provide do |expected|
|
4
|
+
match do |obj|
|
5
|
+
(obj.public_methods + obj.protected_methods + obj.private_methods).include?(expected.to_s)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
module Spec
|
10
|
+
module Example
|
11
|
+
module Subject
|
12
|
+
module ExampleGroupMethods
|
13
|
+
# == Examples
|
14
|
+
#
|
15
|
+
# describe User do
|
16
|
+
# subject { User.new }
|
17
|
+
# provide :name
|
18
|
+
#
|
19
|
+
# [intead of]
|
20
|
+
#
|
21
|
+
# it "should provide #name" do
|
22
|
+
# methods = subject.public_methods + subject.protected_methods + subject.private_methods
|
23
|
+
# methods.should include("name")
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
def provide(name)
|
28
|
+
it "should provide ##{name}" do
|
29
|
+
subject.should provide(name)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chawan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- maiha
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-06 00:00:00 +09:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -33,10 +33,13 @@ files:
|
|
33
33
|
- lib/chawan/analyzers/manager.rb
|
34
34
|
- lib/chawan/node.rb
|
35
35
|
- lib/chawan/commands.rb
|
36
|
+
- lib/chawan/nodes.rb
|
36
37
|
- lib/chawan.rb
|
37
38
|
- spec/chasen_spec.rb
|
38
39
|
- spec/api_spec.rb
|
40
|
+
- spec/nodes_spec.rb
|
39
41
|
- spec/mecab_spec.rb
|
42
|
+
- spec/provide_helper.rb
|
40
43
|
- spec/analyzer_spec.rb
|
41
44
|
- spec/fixtures/example1.txt
|
42
45
|
- spec/fixtures/example2.txt
|