chawan 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +15 -13
- data/Rakefile +1 -1
- data/lib/chawan/nodes.rb +4 -3
- data/spec/nodes_spec.rb +19 -18
- metadata +1 -1
data/README
CHANGED
@@ -32,33 +32,35 @@ Class
|
|
32
32
|
Example
|
33
33
|
=======
|
34
34
|
|
35
|
-
text = '
|
35
|
+
text = '登録された利用者'
|
36
36
|
|
37
37
|
# 'parse' returns a Chawan::Nodes
|
38
38
|
Chawan.parse(text)
|
39
|
-
=> [<名詞: '
|
39
|
+
=> [<名詞: '登録'>, <動詞: 'さ'>, <動詞: 'れ'>, <助動詞: 'た'>, <名詞: '利用'>, <名詞: '者'>]
|
40
40
|
|
41
41
|
# Chawan::Nodes is enumerable
|
42
42
|
Chawan.parse(text).select{|node| node.category == '名詞'}
|
43
|
-
=> [<名詞: '
|
43
|
+
=> [<名詞: '登録'>, <名詞: '利用'>, <名詞: '者'>]
|
44
44
|
|
45
45
|
# gateway interface: noun
|
46
46
|
Chawan.parse(text).noun
|
47
|
-
=> [<名詞: '
|
47
|
+
=> [<名詞: '登録'>, <名詞: '利用'>, <名詞: '者'>]
|
48
48
|
|
49
49
|
# gateway interface: verb
|
50
50
|
Chawan.parse(text).verb
|
51
|
-
=> [
|
51
|
+
=> [<動詞: 'さ'>, <動詞: 'れ'>, <助動詞: 'た'>]
|
52
52
|
|
53
53
|
# gateway interface: grep
|
54
|
-
Chawan.parse(text).grep(
|
55
|
-
=> [
|
54
|
+
Chawan.parse(text).grep(/動詞/)
|
55
|
+
=> [<動詞: 'さ'>, <動詞: 'れ'>, <助動詞: 'た'>]
|
56
|
+
Chawan.parse(text).grep('動詞')
|
57
|
+
=> [<動詞: 'さ'>, <動詞: 'れ'>]
|
56
58
|
|
57
59
|
# gateway interface: compact
|
58
60
|
Chawan.parse(text).compact
|
59
|
-
=> [<名詞: '
|
60
|
-
Chawan.parse(text).compact(
|
61
|
-
=> [<名詞: '
|
61
|
+
=> [<名詞: '登録'>, <動詞: 'され'>, <助動詞: 'た'>, <名詞: '利用者'>]
|
62
|
+
Chawan.parse(text).compact(/動詞/)
|
63
|
+
=> [<名詞: '登録'>, <動詞: 'された'>, <名詞: '利用'>, <名詞: '者'>]
|
62
64
|
|
63
65
|
# gateway interface is chainable
|
64
66
|
Chawan.parse(text).noun.verb
|
@@ -66,11 +68,11 @@ Example
|
|
66
68
|
|
67
69
|
# chainable is fun!
|
68
70
|
Chawan.parse(text).noun
|
69
|
-
=> [<名詞: '
|
71
|
+
=> [<名詞: '登録'>, <名詞: '利用'>, <名詞: '者'>]
|
70
72
|
Chawan.parse(text).compact.noun
|
71
|
-
=> [<名詞: '
|
73
|
+
=> [<名詞: '登録'>, <名詞: '利用者'>]
|
72
74
|
Chawan.parse(text).noun.compact
|
73
|
-
=> [<名詞: '
|
75
|
+
=> [<名詞: '登録利用者'>]
|
74
76
|
|
75
77
|
|
76
78
|
Analyzer
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
|
|
6
6
|
EMAIL = "maiha@wota.jp"
|
7
7
|
HOMEPAGE = "http://github.com/maiha/chawan"
|
8
8
|
SUMMARY = "A cup for chasen that provides an easy to use for extracting Japanese"
|
9
|
-
GEM_VERSION = "0.0.
|
9
|
+
GEM_VERSION = "0.0.4"
|
10
10
|
|
11
11
|
spec = Gem::Specification.new do |s|
|
12
12
|
s.rubyforge_project = 'asakusarb'
|
data/lib/chawan/nodes.rb
CHANGED
@@ -34,11 +34,12 @@ module Chawan
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
def compact(pattern =
|
37
|
+
def compact(pattern = nil)
|
38
38
|
array = []
|
39
|
-
prev = nil
|
40
39
|
each do |node|
|
41
|
-
if array.last and (pattern
|
40
|
+
if array.last and (pattern ?
|
41
|
+
(pattern === node.category && pattern === array.last.category) :
|
42
|
+
(array.last.category == node.category))
|
42
43
|
array.last.word << node.word.to_s
|
43
44
|
else
|
44
45
|
array << Node.new(node.vals.map(&:dup), node.keys)
|
data/spec/nodes_spec.rb
CHANGED
@@ -25,16 +25,17 @@ end
|
|
25
25
|
describe Chawan::Nodes do
|
26
26
|
before do
|
27
27
|
keys = ["表記","品詞"]
|
28
|
-
@n1 = Chawan::Node.new(["
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@
|
32
|
-
@
|
33
|
-
@
|
34
|
-
@nodes = Chawan::Nodes.new([@n1, @
|
28
|
+
@n1 = Chawan::Node.new(["登録","名詞" ], keys)
|
29
|
+
@v1 = Chawan::Node.new(["さ" ,"動詞" ], keys)
|
30
|
+
@v2 = Chawan::Node.new(["れ" ,"動詞" ], keys)
|
31
|
+
@p1 = Chawan::Node.new(["た" ,"助動詞"], keys)
|
32
|
+
@n2 = Chawan::Node.new(["利用","名詞" ], keys)
|
33
|
+
@n3 = Chawan::Node.new(["者" ,"名詞" ], keys)
|
34
|
+
@nodes = Chawan::Nodes.new([@n1, @v1, @v2, @p1, @n2, @n3])
|
35
35
|
|
36
|
-
@n23
|
37
|
-
@v12
|
36
|
+
@n23 = Chawan::Node.new(["利用者","名詞"], keys)
|
37
|
+
@v12 = Chawan::Node.new(["され","動詞"], keys)
|
38
|
+
@v12p1 = Chawan::Node.new(["された","動詞"], keys)
|
38
39
|
end
|
39
40
|
|
40
41
|
subject { @nodes }
|
@@ -43,21 +44,21 @@ describe Chawan::Nodes do
|
|
43
44
|
### Gateway Interface
|
44
45
|
|
45
46
|
gateway_interface(:noun) { should == Chawan::Nodes.new([@n1, @n2, @n3]) }
|
46
|
-
gateway_interface(:verb) { should == Chawan::Nodes.new([@v1, @v2]) }
|
47
|
+
gateway_interface(:verb) { should == Chawan::Nodes.new([@v1, @v2, @p1]) }
|
47
48
|
|
48
|
-
gateway_interface(:compact) { should == Chawan::Nodes.new([@n1, @
|
49
|
-
it "#compact(
|
50
|
-
subject.compact(
|
49
|
+
gateway_interface(:compact) { should == Chawan::Nodes.new([@n1, @v12, @p1, @n23]) }
|
50
|
+
it "#compact(/動詞/) should compact only nodes matched /動詞/" do
|
51
|
+
subject.compact(/動詞/).should == Chawan::Nodes.new([@n1, @v12p1, @n2, @n3])
|
52
|
+
end
|
53
|
+
it "#compact('動詞') should compact only nodes categoried '動詞'" do
|
54
|
+
subject.compact('動詞').should == Chawan::Nodes.new([@n1, @v12, @p1, @n2, @n3])
|
51
55
|
end
|
52
56
|
|
53
57
|
provide :grep
|
54
58
|
describe "#grep" do
|
55
59
|
it "should scope nodes by given category" do
|
56
|
-
subject.grep(
|
57
|
-
subject.grep('
|
58
|
-
|
59
|
-
subject.grep(/助/).should == Chawan::Nodes.new([@p1, @v1, @v2])
|
60
|
-
subject.grep('助').should == Chawan::Nodes.new([])
|
60
|
+
subject.grep(/動詞/).should == Chawan::Nodes.new([@v1, @v2, @p1])
|
61
|
+
subject.grep('動詞').should == Chawan::Nodes.new([@v1, @v2])
|
61
62
|
end
|
62
63
|
end
|
63
64
|
|