chawan 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -26,6 +26,22 @@ Class
26
26
  Example
27
27
  =======
28
28
 
29
+ Chawan.parse('本日は晴天なり')
30
+ => [<名詞: '本日'>, <助詞: 'は'>, <名詞: '晴天'>, <助動詞: 'なり'>]
31
+
32
+ Chawan.parse('本日は晴天なり').select{|node| node.category == '名詞'}.join
33
+ => "本日晴天"
34
+
35
+
36
+ Analyzer
37
+ ========
38
+
39
+ Parser engine is defined as 'analyzer'.
40
+ Available analyzers are:
41
+
42
+ * mecab : (default)
43
+ * chasen
44
+
29
45
  Chawan[:mecab].parse('test')
30
46
  => [<名詞: 'test'>]
31
47
 
@@ -35,14 +51,7 @@ Example
35
51
  # Chawan.parse('test') # default analyzer is :mecab
36
52
 
37
53
  Chawan[:chasen].parse('test')
38
- # ChasenAnalyzer is not implemented yet
39
-
40
-
41
- Chawan.parse('本日は晴天なり')
42
- => [<名詞: '本日'>, <助詞: 'は'>, <名詞: '晴天'>, <助動詞: 'なり'>]
43
-
44
- Chawan.parse('本日は晴天なり').select{|node| node.category == '名詞'}.join
45
- => "本日晴天"
54
+ => [<記号: 't'>, <記号: 'e'>, <記号: 's'>, <記号: 't'>]
46
55
 
47
56
 
48
57
  Required
@@ -55,7 +64,6 @@ Required
55
64
  Todo
56
65
  ====
57
66
 
58
- * implement ChasenAnalyzer
59
67
  * gateway interface to Chawan#parse such as grep, noun, ...
60
68
  * use open3 rather than backquote for executing unix commands
61
69
 
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
6
6
  EMAIL = "maiha@wota.jp"
7
7
  HOMEPAGE = "http://github.com/maiha/chawan"
8
8
  SUMMARY = "A cup for chasen that provides an easy to use for extracting Japanese"
9
- GEM_VERSION = "0.0.1"
9
+ GEM_VERSION = "0.0.2"
10
10
 
11
11
  spec = Gem::Specification.new do |s|
12
12
  s.rubyforge_project = 'asakusarb'
@@ -2,7 +2,9 @@
2
2
  module Chawan
3
3
  module Analyzers
4
4
  class ChasenAnalyzer < AbstractAnalyzer
5
- Fields = []
5
+ # "%m\t%y\t%M\t%U(%P-)\t%\t%\n" # default format
6
+ Format = "%m\t%P:\t%y\t\n"
7
+ Fields = %w{ 見出し 品詞 読み }
6
8
 
7
9
  def parse(text)
8
10
  lines = execute(text).split(/\n/)
@@ -14,12 +16,14 @@ module Chawan
14
16
  private
15
17
  def execute(text)
16
18
  require 'chasen'
17
- # format = %w[%m %y %M %Y %h %P- %t %T- %f %F- %?U/unknown/known/].join("\t") + "\t\n"
18
- Chasen.getopt('-i', 'w')
19
+ Chasen.getopt('-i', 'w', '-F', self.class::Format)
19
20
  Chasen.sparse(text)
20
21
  end
21
22
 
22
- def instantiate(line, fields)
23
+ def instantiate(line, keys)
24
+ vals = line.split(/,|\s/)
25
+ vals[1] = vals[1].split(/:/).first # '名詞-副詞可能' -> '名詞'
26
+ Node.new(vals, keys)
23
27
  end
24
28
  end
25
29
  end
@@ -0,0 +1,27 @@
1
+
2
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
3
+
4
+ describe Chawan::Analyzers::ChasenAnalyzer do
5
+ before do
6
+ @a = Chawan::Analyzers::ChasenAnalyzer.new
7
+ end
8
+
9
+ it "should provide #parse" do
10
+ @a.should respond_to(:parse)
11
+ end
12
+
13
+ describe "#parse" do
14
+ it "should return an Array of Node" do
15
+ text = data("example1.txt")
16
+ @a.parse(text).each do |node|
17
+ node.should be_kind_of(Chawan::Node)
18
+ end
19
+ end
20
+
21
+ it "should work as expected" do
22
+ text = data("example1.txt")
23
+ @a.parse(text).map(&:inspect).join.should ==
24
+ "<名詞: '本日'><助詞: 'は'><名詞: '晴天'><助動詞: 'なり'><記号: '。'>"
25
+ end
26
+ end
27
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chawan
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - maiha
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-03 00:00:00 +09:00
12
+ date: 2010-01-05 00:00:00 +09:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -34,6 +34,7 @@ files:
34
34
  - lib/chawan/node.rb
35
35
  - lib/chawan/commands.rb
36
36
  - lib/chawan.rb
37
+ - spec/chasen_spec.rb
37
38
  - spec/api_spec.rb
38
39
  - spec/mecab_spec.rb
39
40
  - spec/analyzer_spec.rb