chawan 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +17 -9
- data/Rakefile +1 -1
- data/lib/chawan/analyzers/chasen_analyzer.rb +8 -4
- data/spec/chasen_spec.rb +27 -0
- metadata +3 -2
data/README
CHANGED
@@ -26,6 +26,22 @@ Class
|
|
26
26
|
Example
|
27
27
|
=======
|
28
28
|
|
29
|
+
Chawan.parse('本日は晴天なり')
|
30
|
+
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: '晴天'>, <助動詞: 'なり'>]
|
31
|
+
|
32
|
+
Chawan.parse('本日は晴天なり').select{|node| node.category == '名詞'}.join
|
33
|
+
=> "本日晴天"
|
34
|
+
|
35
|
+
|
36
|
+
Analyzer
|
37
|
+
========
|
38
|
+
|
39
|
+
Parser engine is defined as 'analyzer'.
|
40
|
+
Available analyzers are:
|
41
|
+
|
42
|
+
* mecab : (default)
|
43
|
+
* chasen
|
44
|
+
|
29
45
|
Chawan[:mecab].parse('test')
|
30
46
|
=> [<名詞: 'test'>]
|
31
47
|
|
@@ -35,14 +51,7 @@ Example
|
|
35
51
|
# Chawan.parse('test') # default analyzer is :mecab
|
36
52
|
|
37
53
|
Chawan[:chasen].parse('test')
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
Chawan.parse('本日は晴天なり')
|
42
|
-
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: '晴天'>, <助動詞: 'なり'>]
|
43
|
-
|
44
|
-
Chawan.parse('本日は晴天なり').select{|node| node.category == '名詞'}.join
|
45
|
-
=> "本日晴天"
|
54
|
+
=> [<記号: 't'>, <記号: 'e'>, <記号: 's'>, <記号: 't'>]
|
46
55
|
|
47
56
|
|
48
57
|
Required
|
@@ -55,7 +64,6 @@ Required
|
|
55
64
|
Todo
|
56
65
|
====
|
57
66
|
|
58
|
-
* implement ChasenAnalyzer
|
59
67
|
* gateway interface to Chawan#parse such as grep, noun, ...
|
60
68
|
* use open3 rather than backquote for executing unix commands
|
61
69
|
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
|
|
6
6
|
EMAIL = "maiha@wota.jp"
|
7
7
|
HOMEPAGE = "http://github.com/maiha/chawan"
|
8
8
|
SUMMARY = "A cup for chasen that provides an easy to use for extracting Japanese"
|
9
|
-
GEM_VERSION = "0.0.
|
9
|
+
GEM_VERSION = "0.0.2"
|
10
10
|
|
11
11
|
spec = Gem::Specification.new do |s|
|
12
12
|
s.rubyforge_project = 'asakusarb'
|
@@ -2,7 +2,9 @@
|
|
2
2
|
module Chawan
|
3
3
|
module Analyzers
|
4
4
|
class ChasenAnalyzer < AbstractAnalyzer
|
5
|
-
|
5
|
+
# "%m\t%y\t%M\t%U(%P-)\t%\t%\n" # default format
|
6
|
+
Format = "%m\t%P:\t%y\t\n"
|
7
|
+
Fields = %w{ 見出し 品詞 読み }
|
6
8
|
|
7
9
|
def parse(text)
|
8
10
|
lines = execute(text).split(/\n/)
|
@@ -14,12 +16,14 @@ module Chawan
|
|
14
16
|
private
|
15
17
|
def execute(text)
|
16
18
|
require 'chasen'
|
17
|
-
|
18
|
-
Chasen.getopt('-i', 'w')
|
19
|
+
Chasen.getopt('-i', 'w', '-F', self.class::Format)
|
19
20
|
Chasen.sparse(text)
|
20
21
|
end
|
21
22
|
|
22
|
-
def instantiate(line,
|
23
|
+
def instantiate(line, keys)
|
24
|
+
vals = line.split(/,|\s/)
|
25
|
+
vals[1] = vals[1].split(/:/).first # '名詞-副詞可能' -> '名詞'
|
26
|
+
Node.new(vals, keys)
|
23
27
|
end
|
24
28
|
end
|
25
29
|
end
|
data/spec/chasen_spec.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
require File.join(File.dirname(__FILE__), 'spec_helper.rb')
|
3
|
+
|
4
|
+
describe Chawan::Analyzers::ChasenAnalyzer do
|
5
|
+
before do
|
6
|
+
@a = Chawan::Analyzers::ChasenAnalyzer.new
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should provide #parse" do
|
10
|
+
@a.should respond_to(:parse)
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#parse" do
|
14
|
+
it "should return an Array of Node" do
|
15
|
+
text = data("example1.txt")
|
16
|
+
@a.parse(text).each do |node|
|
17
|
+
node.should be_kind_of(Chawan::Node)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should work as expected" do
|
22
|
+
text = data("example1.txt")
|
23
|
+
@a.parse(text).map(&:inspect).join.should ==
|
24
|
+
"<名詞: '本日'><助詞: 'は'><名詞: '晴天'><助動詞: 'なり'><記号: '。'>"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chawan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- maiha
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-05 00:00:00 +09:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -34,6 +34,7 @@ files:
|
|
34
34
|
- lib/chawan/node.rb
|
35
35
|
- lib/chawan/commands.rb
|
36
36
|
- lib/chawan.rb
|
37
|
+
- spec/chasen_spec.rb
|
37
38
|
- spec/api_spec.rb
|
38
39
|
- spec/mecab_spec.rb
|
39
40
|
- spec/analyzer_spec.rb
|