chawan 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +17 -9
- data/Rakefile +1 -1
- data/lib/chawan/analyzers/chasen_analyzer.rb +8 -4
- data/spec/chasen_spec.rb +27 -0
- metadata +3 -2
data/README
CHANGED
@@ -26,6 +26,22 @@ Class
|
|
26
26
|
Example
|
27
27
|
=======
|
28
28
|
|
29
|
+
Chawan.parse('本日は晴天なり')
|
30
|
+
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: '晴天'>, <助動詞: 'なり'>]
|
31
|
+
|
32
|
+
Chawan.parse('本日は晴天なり').select{|node| node.category == '名詞'}.join
|
33
|
+
=> "本日晴天"
|
34
|
+
|
35
|
+
|
36
|
+
Analyzer
|
37
|
+
========
|
38
|
+
|
39
|
+
Parser engine is defined as 'analyzer'.
|
40
|
+
Available analyzers are:
|
41
|
+
|
42
|
+
* mecab : (default)
|
43
|
+
* chasen
|
44
|
+
|
29
45
|
Chawan[:mecab].parse('test')
|
30
46
|
=> [<名詞: 'test'>]
|
31
47
|
|
@@ -35,14 +51,7 @@ Example
|
|
35
51
|
# Chawan.parse('test') # default analyzer is :mecab
|
36
52
|
|
37
53
|
Chawan[:chasen].parse('test')
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
Chawan.parse('本日は晴天なり')
|
42
|
-
=> [<名詞: '本日'>, <助詞: 'は'>, <名詞: '晴天'>, <助動詞: 'なり'>]
|
43
|
-
|
44
|
-
Chawan.parse('本日は晴天なり').select{|node| node.category == '名詞'}.join
|
45
|
-
=> "本日晴天"
|
54
|
+
=> [<記号: 't'>, <記号: 'e'>, <記号: 's'>, <記号: 't'>]
|
46
55
|
|
47
56
|
|
48
57
|
Required
|
@@ -55,7 +64,6 @@ Required
|
|
55
64
|
Todo
|
56
65
|
====
|
57
66
|
|
58
|
-
* implement ChasenAnalyzer
|
59
67
|
* gateway interface to Chawan#parse such as grep, noun, ...
|
60
68
|
* use open3 rather than backquote for executing unix commands
|
61
69
|
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
|
|
6
6
|
EMAIL = "maiha@wota.jp"
|
7
7
|
HOMEPAGE = "http://github.com/maiha/chawan"
|
8
8
|
SUMMARY = "A cup for chasen that provides an easy to use for extracting Japanese"
|
9
|
-
GEM_VERSION = "0.0.
|
9
|
+
GEM_VERSION = "0.0.2"
|
10
10
|
|
11
11
|
spec = Gem::Specification.new do |s|
|
12
12
|
s.rubyforge_project = 'asakusarb'
|
@@ -2,7 +2,9 @@
|
|
2
2
|
module Chawan
|
3
3
|
module Analyzers
|
4
4
|
class ChasenAnalyzer < AbstractAnalyzer
|
5
|
-
|
5
|
+
# "%m\t%y\t%M\t%U(%P-)\t%\t%\n" # default format
|
6
|
+
Format = "%m\t%P:\t%y\t\n"
|
7
|
+
Fields = %w{ 見出し 品詞 読み }
|
6
8
|
|
7
9
|
def parse(text)
|
8
10
|
lines = execute(text).split(/\n/)
|
@@ -14,12 +16,14 @@ module Chawan
|
|
14
16
|
private
|
15
17
|
def execute(text)
|
16
18
|
require 'chasen'
|
17
|
-
|
18
|
-
Chasen.getopt('-i', 'w')
|
19
|
+
Chasen.getopt('-i', 'w', '-F', self.class::Format)
|
19
20
|
Chasen.sparse(text)
|
20
21
|
end
|
21
22
|
|
22
|
-
def instantiate(line,
|
23
|
+
def instantiate(line, keys)
|
24
|
+
vals = line.split(/,|\s/)
|
25
|
+
vals[1] = vals[1].split(/:/).first # '名詞-副詞可能' -> '名詞'
|
26
|
+
Node.new(vals, keys)
|
23
27
|
end
|
24
28
|
end
|
25
29
|
end
|
data/spec/chasen_spec.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
require File.join(File.dirname(__FILE__), 'spec_helper.rb')
|
3
|
+
|
4
|
+
describe Chawan::Analyzers::ChasenAnalyzer do
|
5
|
+
before do
|
6
|
+
@a = Chawan::Analyzers::ChasenAnalyzer.new
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should provide #parse" do
|
10
|
+
@a.should respond_to(:parse)
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#parse" do
|
14
|
+
it "should return an Array of Node" do
|
15
|
+
text = data("example1.txt")
|
16
|
+
@a.parse(text).each do |node|
|
17
|
+
node.should be_kind_of(Chawan::Node)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should work as expected" do
|
22
|
+
text = data("example1.txt")
|
23
|
+
@a.parse(text).map(&:inspect).join.should ==
|
24
|
+
"<名詞: '本日'><助詞: 'は'><名詞: '晴天'><助動詞: 'なり'><記号: '。'>"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chawan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- maiha
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-05 00:00:00 +09:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -34,6 +34,7 @@ files:
|
|
34
34
|
- lib/chawan/node.rb
|
35
35
|
- lib/chawan/commands.rb
|
36
36
|
- lib/chawan.rb
|
37
|
+
- spec/chasen_spec.rb
|
37
38
|
- spec/api_spec.rb
|
38
39
|
- spec/mecab_spec.rb
|
39
40
|
- spec/analyzer_spec.rb
|