chawan 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -26,6 +26,22 @@ Class
26
26
  Example
27
27
  =======
28
28
 
29
+ Chawan.parse('本日は晴天なり')
30
+ => [<名詞: '本日'>, <助詞: 'は'>, <名詞: '晴天'>, <助動詞: 'なり'>]
31
+
32
+ Chawan.parse('本日は晴天なり').select{|node| node.category == '名詞'}.join
33
+ => "本日晴天"
34
+
35
+
36
+ Analyzer
37
+ ========
38
+
39
+ Parser engine is defined as 'analyzer'.
40
+ Available analyzers are:
41
+
42
+ * mecab : (default)
43
+ * chasen
44
+
29
45
  Chawan[:mecab].parse('test')
30
46
  => [<名詞: 'test'>]
31
47
 
@@ -35,14 +51,7 @@ Example
35
51
  # Chawan.parse('test') # default analyzer is :mecab
36
52
 
37
53
  Chawan[:chasen].parse('test')
38
- # ChasenAnalyzer is not implemented yet
39
-
40
-
41
- Chawan.parse('本日は晴天なり')
42
- => [<名詞: '本日'>, <助詞: 'は'>, <名詞: '晴天'>, <助動詞: 'なり'>]
43
-
44
- Chawan.parse('本日は晴天なり').select{|node| node.category == '名詞'}.join
45
- => "本日晴天"
54
+ => [<記号: 't'>, <記号: 'e'>, <記号: 's'>, <記号: 't'>]
46
55
 
47
56
 
48
57
  Required
@@ -55,7 +64,6 @@ Required
55
64
  Todo
56
65
  ====
57
66
 
58
- * implement ChasenAnalyzer
59
67
  * gateway interface to Chawan#parse such as grep, noun, ...
60
68
  * use open3 rather than backquote for executing unix commands
61
69
 
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
6
6
  EMAIL = "maiha@wota.jp"
7
7
  HOMEPAGE = "http://github.com/maiha/chawan"
8
8
  SUMMARY = "A cup for chasen that provides an easy to use for extracting Japanese"
9
- GEM_VERSION = "0.0.1"
9
+ GEM_VERSION = "0.0.2"
10
10
 
11
11
  spec = Gem::Specification.new do |s|
12
12
  s.rubyforge_project = 'asakusarb'
@@ -2,7 +2,9 @@
2
2
  module Chawan
3
3
  module Analyzers
4
4
  class ChasenAnalyzer < AbstractAnalyzer
5
- Fields = []
5
+ # "%m\t%y\t%M\t%U(%P-)\t%\t%\n" # default format
6
+ Format = "%m\t%P:\t%y\t\n"
7
+ Fields = %w{ 見出し 品詞 読み }
6
8
 
7
9
  def parse(text)
8
10
  lines = execute(text).split(/\n/)
@@ -14,12 +16,14 @@ module Chawan
14
16
  private
15
17
  def execute(text)
16
18
  require 'chasen'
17
- # format = %w[%m %y %M %Y %h %P- %t %T- %f %F- %?U/unknown/known/].join("\t") + "\t\n"
18
- Chasen.getopt('-i', 'w')
19
+ Chasen.getopt('-i', 'w', '-F', self.class::Format)
19
20
  Chasen.sparse(text)
20
21
  end
21
22
 
22
- def instantiate(line, fields)
23
+ def instantiate(line, keys)
24
+ vals = line.split(/,|\s/)
25
+ vals[1] = vals[1].split(/:/).first # '名詞-副詞可能' -> '名詞'
26
+ Node.new(vals, keys)
23
27
  end
24
28
  end
25
29
  end
@@ -0,0 +1,27 @@
1
+
2
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
3
+
4
+ describe Chawan::Analyzers::ChasenAnalyzer do
5
+ before do
6
+ @a = Chawan::Analyzers::ChasenAnalyzer.new
7
+ end
8
+
9
+ it "should provide #parse" do
10
+ @a.should respond_to(:parse)
11
+ end
12
+
13
+ describe "#parse" do
14
+ it "should return an Array of Node" do
15
+ text = data("example1.txt")
16
+ @a.parse(text).each do |node|
17
+ node.should be_kind_of(Chawan::Node)
18
+ end
19
+ end
20
+
21
+ it "should work as expected" do
22
+ text = data("example1.txt")
23
+ @a.parse(text).map(&:inspect).join.should ==
24
+ "<名詞: '本日'><助詞: 'は'><名詞: '晴天'><助動詞: 'なり'><記号: '。'>"
25
+ end
26
+ end
27
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chawan
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - maiha
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-03 00:00:00 +09:00
12
+ date: 2010-01-05 00:00:00 +09:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -34,6 +34,7 @@ files:
34
34
  - lib/chawan/node.rb
35
35
  - lib/chawan/commands.rb
36
36
  - lib/chawan.rb
37
+ - spec/chasen_spec.rb
37
38
  - spec/api_spec.rb
38
39
  - spec/mecab_spec.rb
39
40
  - spec/analyzer_spec.rb