natto 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/natto.rb +84 -25
- data/test/test_natto.rb +30 -4
- metadata +4 -5
data/lib/natto.rb
CHANGED
@@ -1,39 +1,98 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
1
2
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
2
|
-
require 'ffi'
|
3
|
-
require 'rbconfig'
|
4
3
|
|
5
4
|
module Natto
|
6
|
-
|
5
|
+
require 'ffi'
|
7
6
|
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
class MeCab
|
8
|
+
SUPPORTED_OPTS = [ :rcfile, :dicdir, :userdic, :output_format_type, :lattice_level,
|
9
|
+
:node_format, :unk_format, :bos_format, :eos_format, :eon_format,
|
10
|
+
:unk_feature, :nbest, :theta, :cost_factor ]
|
11
|
+
# :all_morphs, :partial, :allocate_sentence ]
|
12
|
+
attr_reader :ptr
|
13
|
+
|
14
|
+
def initialize(options={})
|
15
|
+
opt_str = self.class.build_options_str(options)
|
16
|
+
#@ptr = FFI::MemoryPointer.new :pointer
|
17
|
+
@ptr = Natto::Binding.mecab_new2(opt_str)
|
18
|
+
puts @ptr.inspect
|
19
|
+
raise MeCabError.new("MeCab initialiation error with '#{opt_str}'") if @ptr.address == 0
|
20
|
+
#@dict = Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
|
21
|
+
ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
|
22
|
+
end
|
11
23
|
|
12
|
-
|
13
|
-
|
24
|
+
def parse(s)
|
25
|
+
Natto::Binding.mecab_sparse_tostr(@ptr, s) ||
|
26
|
+
raise(MeCabError.new(Natto::Binding.mecab_strerror(@ptr)))
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.create_free_proc(ptr)
|
30
|
+
Proc.new do
|
31
|
+
#puts "mecab_destroy #{ptr}"
|
32
|
+
Natto::Binding.mecab_destroy(ptr)
|
33
|
+
end
|
34
|
+
end
|
14
35
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
36
|
+
def self.build_options_str(options={})
|
37
|
+
opt = []
|
38
|
+
SUPPORTED_OPTS.each do |k|
|
39
|
+
if options.has_key? k
|
40
|
+
key = k.to_s.gsub('_', '-')
|
41
|
+
opt << "--#{key}=#{options[k]}"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
opt.join(" ")
|
21
45
|
end
|
22
46
|
end
|
47
|
+
|
48
|
+
class MeCabError < RuntimeError; end
|
49
|
+
|
50
|
+
class DictionaryInfo < FFI::Struct
|
51
|
+
layout :filename, :string,
|
52
|
+
:charset, :string,
|
53
|
+
:size, :uint,
|
54
|
+
:type, :int,
|
55
|
+
:lsize, :uint,
|
56
|
+
:rsize, :uint,
|
57
|
+
:version, :ushort,
|
58
|
+
:next, :pointer
|
59
|
+
end
|
23
60
|
|
24
|
-
|
61
|
+
module Binding
|
62
|
+
require 'rbconfig'
|
63
|
+
extend FFI::Library
|
25
64
|
|
26
|
-
|
27
|
-
attach_function :mecab_version, [], :string
|
28
|
-
attach_function :mecab_sparse_tostr, [:pointer, :string], :string
|
29
|
-
attach_function :mecab_strerror, [:pointer],:string
|
30
|
-
attach_function :mecab_destroy, [:pointer], :void
|
65
|
+
MECAB_PATH = 'MECAB_PATH'
|
31
66
|
|
32
|
-
|
33
|
-
|
34
|
-
|
67
|
+
def self.included(base)
|
68
|
+
base.extend(ClassMethods)
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.find_library
|
72
|
+
host_os = RbConfig::CONFIG['host_os']
|
73
|
+
|
74
|
+
if host_os =~ /mswin|mingw/i
|
75
|
+
raise LoadError, "Please set #{MECAB_PATH} to full path to libmecab.dll"
|
76
|
+
elsif host_os =~ /cygwin/i
|
77
|
+
'cygmecab-1'
|
78
|
+
else
|
79
|
+
'mecab'
|
80
|
+
end
|
35
81
|
end
|
36
|
-
end
|
37
|
-
end
|
38
82
|
|
83
|
+
ffi_lib(ENV[MECAB_PATH] || find_library)
|
39
84
|
|
85
|
+
attach_function :mecab_version, [], :string
|
86
|
+
attach_function :mecab_new2, [:string], :pointer
|
87
|
+
attach_function :mecab_destroy, [:pointer], :void
|
88
|
+
attach_function :mecab_sparse_tostr, [:pointer, :string], :string
|
89
|
+
attach_function :mecab_strerror, [:pointer],:string
|
90
|
+
attach_function :mecab_dictionary_info, [:pointer], :pointer
|
91
|
+
|
92
|
+
module ClassMethods
|
93
|
+
def mecab_version
|
94
|
+
Natto::Binding.mecab_version
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
data/test/test_natto.rb
CHANGED
@@ -1,20 +1,46 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
1
2
|
$:.unshift('lib')
|
2
|
-
|
3
3
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
4
4
|
require 'test/unit'
|
5
5
|
require 'natto'
|
6
6
|
|
7
7
|
class TestNatto < Test::Unit::TestCase
|
8
8
|
def setup
|
9
|
+
@klass = Class.new do
|
10
|
+
include Natto::Binding
|
11
|
+
end
|
9
12
|
end
|
10
13
|
|
11
14
|
def teardown
|
12
15
|
end
|
13
16
|
|
14
17
|
def test_classmethods_include
|
15
|
-
klass
|
16
|
-
|
18
|
+
assert_equal('0.98', @klass.mecab_version)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_build_options_str
|
22
|
+
res = Natto::MeCab.build_options_str
|
23
|
+
assert_equal('', res)
|
24
|
+
|
25
|
+
res = Natto::MeCab.build_options_str(:unknown=>"ignore")
|
26
|
+
assert_equal('', res)
|
27
|
+
|
28
|
+
res = Natto::MeCab.build_options_str(:dicdir=>"a")
|
29
|
+
assert_equal('--dicdir=a', res)
|
30
|
+
|
31
|
+
res = Natto::MeCab.build_options_str(:userdic=>"b")
|
32
|
+
assert_equal('--userdic=b', res)
|
33
|
+
|
34
|
+
res = Natto::MeCab.build_options_str(:output_format_type=>"c")
|
35
|
+
assert_equal('--output-format-type=c', res)
|
36
|
+
|
37
|
+
res = Natto::MeCab.build_options_str(:output_format_type=>"c", :userdic=>"b", :dicdir=>"a")
|
38
|
+
assert_equal('--dicdir=a --userdic=b --output-format-type=c', res)
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_initialize
|
42
|
+
assert_raise Natto::MeCabError do
|
43
|
+
Natto::MeCab.new(:output_format_type=>'UNDEFINED')
|
17
44
|
end
|
18
|
-
assert_equal('0.98', klass.mecab_version)
|
19
45
|
end
|
20
46
|
end
|
metadata
CHANGED
@@ -5,16 +5,16 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Brooke M. Fujita
|
13
|
-
autorequire:
|
13
|
+
autorequire:
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-12-
|
17
|
+
date: 2010-12-20 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -72,7 +72,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
72
72
|
- 0
|
73
73
|
version: "0"
|
74
74
|
requirements:
|
75
|
-
- none
|
76
75
|
- MeCab, 0.98 or greater
|
77
76
|
rubyforge_project:
|
78
77
|
rubygems_version: 1.3.7
|