mecab-light 0.2.5 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -10
- data/Rakefile +4 -5
- data/ext/mecab/{light/extconf.rb → extconf.rb} +2 -2
- data/ext/mecab/light.c +227 -0
- data/mecab-light.gemspec +21 -10
- metadata +17 -58
- data/ext/mecab/light/binding.c +0 -50
- data/lib/mecab/light/morpheme.rb +0 -18
- data/lib/mecab/light/result.rb +0 -51
- data/lib/mecab/light/tagger.rb +0 -18
- data/lib/mecab/light/version.rb +0 -5
- data/lib/mecab/light.rb +0 -5
- data/spec/mecab-light-morpheme_spec.rb +0 -92
- data/spec/mecab-light-result_spec.rb +0 -147
- data/spec/mecab-light-tagger_spec.rb +0 -37
- data/spec/spec_helper.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 905b46000c4f3aa2da1252ee10f01f5cd44cfc6a
|
4
|
+
data.tar.gz: ffde14f8a056e8dfa9dde57900ca6e08fc6da0ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cf53d9d1d3e27cd74531190423193cba4ddae9ed334ae3b61250ab92038eeab789b671ee64d32f47034bcb77d2acfc0072a0adec109b17a19ac8ce5f6b9d686
|
7
|
+
data.tar.gz: 902dc3c3a94f117f8bb548300b6c4c151fa8d9d3dae6698f37a320a56b13c92f3b40f4a19fad02cec65018208223b048943ffd6b93575bec0a7555f315bfdd03
|
data/README.md
CHANGED
@@ -1,11 +1,5 @@
|
|
1
1
|
# MeCab::Light
|
2
2
|
|
3
|
-
[![Gem Version](https://badge.fury.io/rb/mecab-light.png)][gem]
|
4
|
-
[![Build Status](https://travis-ci.org/hadzimme/mecab-light.png)][travis]
|
5
|
-
[![Coverage Status](https://coveralls.io/repos/hadzimme/mecab-light/badge.png?branch=master)][coveralls]
|
6
|
-
[![Code Climate](https://codeclimate.com/github/hadzimme/mecab-light.png)][codeclimate]
|
7
|
-
[![Dependency Status](https://gemnasium.com/hadzimme/mecab-light.png)][gemnasium]
|
8
|
-
|
9
3
|
[gem]: http://badge.fury.io/rb/mecab-light
|
10
4
|
[travis]: https://travis-ci.org/hadzimme/mecab-light
|
11
5
|
[coveralls]: https://coveralls.io/r/hadzimme/mecab-light?branch=master
|
@@ -28,24 +22,34 @@ Or install it yourself as:
|
|
28
22
|
|
29
23
|
$ gem install mecab-light
|
30
24
|
|
25
|
+
Install on Windows:
|
26
|
+
|
27
|
+
$ gem install mecab-light -- --with-mecab-folder=C:/MeCab # assign yours
|
28
|
+
|
31
29
|
## Usage
|
32
30
|
|
33
31
|
```ruby
|
34
32
|
require 'mecab/light'
|
35
33
|
|
36
|
-
tagger = MeCab::Light::Tagger.new
|
34
|
+
tagger = MeCab::Light::Tagger.new('')
|
37
35
|
string = 'この文を形態素解析してください。'
|
38
36
|
result = tagger.parse(string)
|
39
|
-
result[0].surface #=> "この"
|
40
37
|
result.kind_of?(Enumerable) #=> true
|
41
38
|
result.map(&:surface)
|
42
39
|
#=> ["この", "文", "を", "形態素", "解析", "し", "て", "ください", "。"]
|
40
|
+
|
41
|
+
model = MeCab::Light::Model.new('')
|
42
|
+
tagger = MeCab::Light::Tagger.new(model)
|
43
|
+
lattice = MeCab::Light::Lattice.new(model)
|
44
|
+
lattice.sentence = 'この文を形態素解析してください。'
|
45
|
+
result = tagger.parse(lattice)
|
46
|
+
result.map(&:surface)
|
47
|
+
#=> ["この", "文", "を", "形態素", "解析", "し", "て", "ください", "。"]
|
43
48
|
```
|
44
49
|
|
45
50
|
MeCab::Light is a lightweight tool.
|
46
51
|
This gem works without the official binding.
|
47
|
-
|
48
|
-
Note that the method's feature is totally different from its original.
|
52
|
+
Note that this supports less methods than those of C API for now.
|
49
53
|
|
50
54
|
## Contributing
|
51
55
|
|
data/Rakefile
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
|
3
3
|
Bundler.setup
|
4
|
-
require 'rspec/core/rake_task'
|
5
4
|
|
6
|
-
desc '
|
7
|
-
|
8
|
-
|
5
|
+
desc 'Run test'
|
6
|
+
|
7
|
+
task :test do
|
9
8
|
end
|
10
9
|
|
11
|
-
task :default => :
|
10
|
+
task :default => :test
|
@@ -4,10 +4,10 @@ if mecab_dir = arg_config('--with-mecab-folder')
|
|
4
4
|
sdk_dir = File.join(mecab_dir, 'sdk')
|
5
5
|
bin_dir = File.join(mecab_dir, 'bin')
|
6
6
|
if find_header('mecab.h', sdk_dir) && find_library('mecab', nil, bin_dir)
|
7
|
-
create_makefile('mecab/light
|
7
|
+
create_makefile('mecab/light')
|
8
8
|
end
|
9
9
|
else
|
10
10
|
if have_header('mecab.h') && have_library('mecab')
|
11
|
-
create_makefile('mecab/light
|
11
|
+
create_makefile('mecab/light')
|
12
12
|
end
|
13
13
|
end
|
data/ext/mecab/light.c
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
3
|
+
#include <mecab.h>
|
4
|
+
|
5
|
+
#define MECAB_LIGHT_MAJOR_VERSION 1
|
6
|
+
#define MECAB_LIGHT_MINOR_VERSION 0
|
7
|
+
#define MECAB_LIGHT_PATCH_VERSION 0
|
8
|
+
|
9
|
+
typedef struct {
|
10
|
+
mecab_model_t* ptr;
|
11
|
+
} Model;
|
12
|
+
|
13
|
+
typedef struct {
|
14
|
+
mecab_t* ptr;
|
15
|
+
} Tagger;
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
mecab_lattice_t* ptr;
|
19
|
+
rb_encoding* enc;
|
20
|
+
} Lattice;
|
21
|
+
|
22
|
+
typedef struct {
|
23
|
+
const mecab_node_t* ptr;
|
24
|
+
rb_encoding* enc;
|
25
|
+
} Node;
|
26
|
+
|
27
|
+
typedef struct {
|
28
|
+
const mecab_node_t* bos_node;
|
29
|
+
rb_encoding* enc;
|
30
|
+
} Result;
|
31
|
+
|
32
|
+
static VALUE
|
33
|
+
name_space()
|
34
|
+
{
|
35
|
+
VALUE rb_mMeCab = rb_define_module("MeCab");
|
36
|
+
return rb_define_module_under(rb_mMeCab, "Light");
|
37
|
+
}
|
38
|
+
|
39
|
+
static void
|
40
|
+
model_free(Model* model)
|
41
|
+
{
|
42
|
+
mecab_model_destroy(model->ptr);
|
43
|
+
}
|
44
|
+
|
45
|
+
static VALUE
|
46
|
+
model_alloc(VALUE klass)
|
47
|
+
{
|
48
|
+
Model* model = ALLOC(Model);
|
49
|
+
return Data_Wrap_Struct(klass, 0, model_free, model);
|
50
|
+
}
|
51
|
+
|
52
|
+
static void
|
53
|
+
tagger_free(Tagger* tagger)
|
54
|
+
{
|
55
|
+
mecab_destroy(tagger->ptr);
|
56
|
+
}
|
57
|
+
|
58
|
+
static VALUE
|
59
|
+
tagger_alloc(VALUE klass)
|
60
|
+
{
|
61
|
+
Tagger* tagger = ALLOC(Tagger);
|
62
|
+
return Data_Wrap_Struct(klass, 0, tagger_free, tagger);
|
63
|
+
}
|
64
|
+
|
65
|
+
static void
|
66
|
+
lattice_free(Lattice* lattice)
|
67
|
+
{
|
68
|
+
mecab_lattice_destroy(lattice->ptr);
|
69
|
+
}
|
70
|
+
|
71
|
+
static VALUE
|
72
|
+
lattice_alloc(VALUE klass)
|
73
|
+
{
|
74
|
+
Lattice* lattice = ALLOC(Lattice);
|
75
|
+
return Data_Wrap_Struct(klass, 0, lattice_free, lattice);
|
76
|
+
}
|
77
|
+
|
78
|
+
static VALUE
|
79
|
+
rb_model_initialize(VALUE self, VALUE arg)
|
80
|
+
{
|
81
|
+
Model* model;
|
82
|
+
|
83
|
+
Data_Get_Struct(self, Model, model);
|
84
|
+
model->ptr = mecab_model_new2(RSTRING_PTR(arg));
|
85
|
+
return Qnil;
|
86
|
+
}
|
87
|
+
|
88
|
+
static VALUE
|
89
|
+
rb_tagger_initialize(VALUE self, VALUE arg)
|
90
|
+
{
|
91
|
+
Tagger* tagger;
|
92
|
+
Model* model;
|
93
|
+
VALUE class_of_arg, rb_cModel;
|
94
|
+
|
95
|
+
Data_Get_Struct(self, Tagger, tagger);
|
96
|
+
rb_cModel = rb_define_class_under(name_space(), "Model", rb_cObject);
|
97
|
+
class_of_arg = CLASS_OF(arg);
|
98
|
+
if (class_of_arg == rb_cString) {
|
99
|
+
tagger->ptr = mecab_new2(RSTRING_PTR(arg));
|
100
|
+
} else if (class_of_arg == rb_cModel) {
|
101
|
+
Data_Get_Struct(arg, Model, model);
|
102
|
+
tagger->ptr = mecab_model_new_tagger(model->ptr);
|
103
|
+
} else {
|
104
|
+
rb_raise(rb_eTypeError, "The argument should be String or MeCab::Light::Model");
|
105
|
+
}
|
106
|
+
return Qnil;
|
107
|
+
}
|
108
|
+
|
109
|
+
static VALUE
|
110
|
+
rb_tagger_parse(VALUE self, VALUE arg)
|
111
|
+
{
|
112
|
+
Tagger* tagger;
|
113
|
+
Lattice* lattice;
|
114
|
+
Result* result = ALLOC(Result);
|
115
|
+
VALUE class_of_arg, rb_cLattice, rb_cResult;
|
116
|
+
|
117
|
+
Data_Get_Struct(self, Tagger, tagger);
|
118
|
+
rb_cLattice = rb_define_class_under(name_space(), "Lattice", rb_cObject);
|
119
|
+
class_of_arg = CLASS_OF(arg);
|
120
|
+
if (class_of_arg == rb_cString) {
|
121
|
+
result->bos_node = mecab_sparse_tonode(tagger->ptr, RSTRING_PTR(arg));
|
122
|
+
result->enc = rb_enc_get(arg);
|
123
|
+
} else if (class_of_arg == rb_cLattice) {
|
124
|
+
Data_Get_Struct(arg, Lattice, lattice);
|
125
|
+
mecab_parse_lattice(tagger->ptr, lattice->ptr);
|
126
|
+
result->bos_node = mecab_lattice_get_bos_node(lattice->ptr);
|
127
|
+
result->enc = lattice->enc;
|
128
|
+
} else {
|
129
|
+
rb_raise(rb_eTypeError, "The argument should be String or MeCab::Light::Lattice");
|
130
|
+
}
|
131
|
+
rb_cResult = rb_define_class_under(name_space(), "Result", rb_cObject);
|
132
|
+
return Data_Wrap_Struct(rb_cResult, 0, 0, result);
|
133
|
+
}
|
134
|
+
|
135
|
+
static VALUE
|
136
|
+
rb_lattice_initialize(VALUE self, VALUE rb_model)
|
137
|
+
{
|
138
|
+
Lattice* lattice;
|
139
|
+
Model* model;
|
140
|
+
|
141
|
+
Data_Get_Struct(self, Lattice, lattice);
|
142
|
+
Data_Get_Struct(rb_model, Model, model);
|
143
|
+
lattice->ptr = mecab_model_new_lattice(model->ptr);
|
144
|
+
return Qnil;
|
145
|
+
}
|
146
|
+
|
147
|
+
static VALUE
|
148
|
+
rb_lattice_set_sentence(VALUE self, VALUE str)
|
149
|
+
{
|
150
|
+
Lattice* lattice;
|
151
|
+
|
152
|
+
Data_Get_Struct(self, Lattice, lattice);
|
153
|
+
mecab_lattice_set_sentence(lattice->ptr, RSTRING_PTR(str));
|
154
|
+
lattice->enc = rb_enc_get(str);
|
155
|
+
return str;
|
156
|
+
}
|
157
|
+
|
158
|
+
static VALUE
|
159
|
+
result_enum_length(VALUE self, VALUE args, VALUE eobj)
|
160
|
+
{
|
161
|
+
return rb_funcall(self, rb_intern("count"), 0);
|
162
|
+
}
|
163
|
+
|
164
|
+
static VALUE
|
165
|
+
rb_result_each(VALUE self)
|
166
|
+
{
|
167
|
+
Result* result;
|
168
|
+
Node* node;
|
169
|
+
VALUE rb_cNode;
|
170
|
+
|
171
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, result_enum_length);
|
172
|
+
Data_Get_Struct(self, Result, result);
|
173
|
+
node = ALLOC(Node);
|
174
|
+
node->ptr = result->bos_node->next;
|
175
|
+
node->enc = result->enc;
|
176
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
177
|
+
for (; node->ptr->next; node->ptr = node->ptr->next) {
|
178
|
+
rb_yield(Data_Wrap_Struct(rb_cNode, 0, 0, node));
|
179
|
+
}
|
180
|
+
return self;
|
181
|
+
}
|
182
|
+
|
183
|
+
static VALUE
|
184
|
+
rb_node_get_surface(VALUE self)
|
185
|
+
{
|
186
|
+
Node* node;
|
187
|
+
VALUE surface;
|
188
|
+
|
189
|
+
Data_Get_Struct(self, Node, node);
|
190
|
+
surface = rb_str_new(node->ptr->surface, node->ptr->length);
|
191
|
+
return rb_enc_associate(surface, node->enc);
|
192
|
+
}
|
193
|
+
|
194
|
+
static VALUE
|
195
|
+
rb_node_get_feature(VALUE self)
|
196
|
+
{
|
197
|
+
Node* node;
|
198
|
+
VALUE feature;
|
199
|
+
|
200
|
+
Data_Get_Struct(self, Node, node);
|
201
|
+
feature = rb_str_new2(node->ptr->feature);
|
202
|
+
return rb_enc_associate(feature, node->enc);
|
203
|
+
}
|
204
|
+
|
205
|
+
void
|
206
|
+
Init_light()
|
207
|
+
{
|
208
|
+
VALUE rb_cModel, rb_cLattice, rb_cTagger, rb_cNode, rb_cResult;
|
209
|
+
|
210
|
+
rb_cModel = rb_define_class_under(name_space(), "Model", rb_cObject);
|
211
|
+
rb_cTagger = rb_define_class_under(name_space(), "Tagger", rb_cObject);
|
212
|
+
rb_cLattice = rb_define_class_under(name_space(), "Lattice", rb_cObject);
|
213
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
214
|
+
rb_cResult = rb_define_class_under(name_space(), "Result", rb_cObject);
|
215
|
+
rb_define_alloc_func(rb_cModel, model_alloc);
|
216
|
+
rb_define_alloc_func(rb_cTagger, tagger_alloc);
|
217
|
+
rb_define_alloc_func(rb_cLattice, lattice_alloc);
|
218
|
+
rb_define_private_method(rb_cModel, "initialize", rb_model_initialize, 1);
|
219
|
+
rb_define_private_method(rb_cTagger, "initialize", rb_tagger_initialize, 1);
|
220
|
+
rb_define_private_method(rb_cLattice, "initialize", rb_lattice_initialize, 1);
|
221
|
+
rb_define_method(rb_cTagger, "parse", rb_tagger_parse, 1);
|
222
|
+
rb_define_method(rb_cLattice, "sentence=", rb_lattice_set_sentence, 1);
|
223
|
+
rb_define_method(rb_cResult, "each", rb_result_each, 0);
|
224
|
+
rb_define_method(rb_cNode, "surface", rb_node_get_surface, 0);
|
225
|
+
rb_define_method(rb_cNode, "feature", rb_node_get_feature, 0);
|
226
|
+
rb_include_module(rb_cResult, rb_mEnumerable);
|
227
|
+
}
|
data/mecab-light.gemspec
CHANGED
@@ -1,11 +1,24 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
base_dir = File.dirname(__FILE__)
|
2
|
+
ext_dir = File.join(base_dir, 'ext', 'mecab')
|
3
|
+
|
4
|
+
guess_version = lambda do |ext_dir|
|
5
|
+
version = {}
|
6
|
+
|
7
|
+
File.open(File.join(ext_dir, 'light.c')) do |light_c|
|
8
|
+
light_c.each_line do |line|
|
9
|
+
case line
|
10
|
+
when /\A#define MECAB_LIGHT_([A-Z]+)_VERSION (\d+)/
|
11
|
+
version[$1.intern] = $2
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
[version[:MAJOR], version[:MINOR], version[:PATCH]].join('.')
|
17
|
+
end
|
5
18
|
|
6
19
|
Gem::Specification.new do |gem|
|
7
20
|
gem.name = "mecab-light"
|
8
|
-
gem.version =
|
21
|
+
gem.version = guess_version.call(ext_dir)
|
9
22
|
gem.authors = ["Hajime Wakahara"]
|
10
23
|
gem.email = ["hadzimme@icloud.com"]
|
11
24
|
gem.description = %q{Use a sequence of morphemes as an Enumerable object.}
|
@@ -16,11 +29,9 @@ Gem::Specification.new do |gem|
|
|
16
29
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
30
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
31
|
gem.require_paths = ["lib"]
|
19
|
-
gem.required_ruby_version = '>=
|
20
|
-
gem.extensions << 'ext/mecab/
|
32
|
+
gem.required_ruby_version = '>= 2.0'
|
33
|
+
gem.extensions << 'ext/mecab/extconf.rb'
|
21
34
|
|
22
35
|
gem.add_development_dependency 'rake'
|
23
|
-
gem.add_development_dependency '
|
24
|
-
gem.add_development_dependency 'simplecov'
|
25
|
-
gem.add_development_dependency 'coveralls'
|
36
|
+
gem.add_development_dependency 'test-unit'
|
26
37
|
end
|
metadata
CHANGED
@@ -1,69 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mecab-light
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hajime Wakahara
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: test-unit
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: simplecov
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - '>='
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - '>='
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: coveralls
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - '>='
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - '>='
|
38
|
+
- - ">="
|
67
39
|
- !ruby/object:Gem::Version
|
68
40
|
version: '0'
|
69
41
|
description: Use a sequence of morphemes as an Enumerable object.
|
@@ -71,27 +43,18 @@ email:
|
|
71
43
|
- hadzimme@icloud.com
|
72
44
|
executables: []
|
73
45
|
extensions:
|
74
|
-
- ext/mecab/
|
46
|
+
- ext/mecab/extconf.rb
|
75
47
|
extra_rdoc_files: []
|
76
48
|
files:
|
77
|
-
- .gitignore
|
78
|
-
- .travis.yml
|
49
|
+
- ".gitignore"
|
50
|
+
- ".travis.yml"
|
79
51
|
- Gemfile
|
80
52
|
- LICENSE.txt
|
81
53
|
- README.md
|
82
54
|
- Rakefile
|
83
|
-
- ext/mecab/
|
84
|
-
- ext/mecab/light
|
85
|
-
- lib/mecab/light.rb
|
86
|
-
- lib/mecab/light/morpheme.rb
|
87
|
-
- lib/mecab/light/result.rb
|
88
|
-
- lib/mecab/light/tagger.rb
|
89
|
-
- lib/mecab/light/version.rb
|
55
|
+
- ext/mecab/extconf.rb
|
56
|
+
- ext/mecab/light.c
|
90
57
|
- mecab-light.gemspec
|
91
|
-
- spec/mecab-light-morpheme_spec.rb
|
92
|
-
- spec/mecab-light-result_spec.rb
|
93
|
-
- spec/mecab-light-tagger_spec.rb
|
94
|
-
- spec/spec_helper.rb
|
95
58
|
homepage: https://github.com/hadzimme/mecab-light
|
96
59
|
licenses: []
|
97
60
|
metadata: {}
|
@@ -101,22 +64,18 @@ require_paths:
|
|
101
64
|
- lib
|
102
65
|
required_ruby_version: !ruby/object:Gem::Requirement
|
103
66
|
requirements:
|
104
|
-
- -
|
67
|
+
- - ">="
|
105
68
|
- !ruby/object:Gem::Version
|
106
|
-
version: '
|
69
|
+
version: '2.0'
|
107
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
71
|
requirements:
|
109
|
-
- -
|
72
|
+
- - ">="
|
110
73
|
- !ruby/object:Gem::Version
|
111
74
|
version: '0'
|
112
75
|
requirements: []
|
113
76
|
rubyforge_project:
|
114
|
-
rubygems_version: 2.
|
77
|
+
rubygems_version: 2.3.0
|
115
78
|
signing_key:
|
116
79
|
specification_version: 4
|
117
80
|
summary: An simple interface for MeCab (UNOFFICIAL)
|
118
|
-
test_files:
|
119
|
-
- spec/mecab-light-morpheme_spec.rb
|
120
|
-
- spec/mecab-light-result_spec.rb
|
121
|
-
- spec/mecab-light-tagger_spec.rb
|
122
|
-
- spec/spec_helper.rb
|
81
|
+
test_files: []
|
data/ext/mecab/light/binding.c
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
#include <ruby.h>
|
2
|
-
#include <ruby/encoding.h>
|
3
|
-
#include <mecab.h>
|
4
|
-
|
5
|
-
typedef struct {
|
6
|
-
mecab_t* ptr;
|
7
|
-
} MeCab;
|
8
|
-
|
9
|
-
static VALUE
|
10
|
-
mecab_alloc(VALUE klass)
|
11
|
-
{
|
12
|
-
MeCab* mecab = ALLOC(MeCab);
|
13
|
-
return Data_Wrap_Struct(klass, 0, 0, mecab);
|
14
|
-
}
|
15
|
-
|
16
|
-
static VALUE
|
17
|
-
rb_mecab_initialize(VALUE self, VALUE arg)
|
18
|
-
{
|
19
|
-
MeCab* mecab;
|
20
|
-
|
21
|
-
Data_Get_Struct(self, MeCab, mecab);
|
22
|
-
mecab->ptr = mecab_new2(RSTRING_PTR(arg));
|
23
|
-
return Qnil;
|
24
|
-
}
|
25
|
-
|
26
|
-
static VALUE
|
27
|
-
rb_mecab_parse_to_s(VALUE self, VALUE str)
|
28
|
-
{
|
29
|
-
MeCab* mecab;
|
30
|
-
const char* result;
|
31
|
-
rb_encoding* enc;
|
32
|
-
|
33
|
-
Data_Get_Struct(self, MeCab, mecab);
|
34
|
-
enc = rb_enc_get(str);
|
35
|
-
result = mecab_sparse_tostr(mecab->ptr, RSTRING_PTR(str));
|
36
|
-
return rb_enc_associate(rb_str_new2(result), enc);
|
37
|
-
}
|
38
|
-
|
39
|
-
void
|
40
|
-
Init_binding()
|
41
|
-
{
|
42
|
-
VALUE rb_mMeCab, rb_mLight, rb_cBinding;
|
43
|
-
|
44
|
-
rb_mMeCab = rb_define_module("MeCab");
|
45
|
-
rb_mLight = rb_define_module_under(rb_mMeCab, "Light");
|
46
|
-
rb_cBinding = rb_define_class_under(rb_mLight, "Binding", rb_cObject);
|
47
|
-
rb_define_alloc_func(rb_cBinding, mecab_alloc);
|
48
|
-
rb_define_private_method(rb_cBinding, "initialize", rb_mecab_initialize, 1);
|
49
|
-
rb_define_method(rb_cBinding, "parse_to_s", rb_mecab_parse_to_s, 1);
|
50
|
-
}
|
data/lib/mecab/light/morpheme.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
module MeCab
|
2
|
-
module Light
|
3
|
-
class Morpheme
|
4
|
-
def initialize(line)
|
5
|
-
@to_s = line.chomp
|
6
|
-
@surface, @feature = @to_s.split(/\t/)
|
7
|
-
end
|
8
|
-
|
9
|
-
alias to_s_orig to_s
|
10
|
-
private :to_s_orig
|
11
|
-
attr_reader :surface, :feature, :to_s
|
12
|
-
|
13
|
-
def inspect
|
14
|
-
to_s_orig.sub(/>$/, " #{@to_s}>")
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/mecab/light/result.rb
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
module MeCab
|
2
|
-
module Light
|
3
|
-
class Result
|
4
|
-
include Enumerable
|
5
|
-
|
6
|
-
def initialize(parsed)
|
7
|
-
@to_s = parsed
|
8
|
-
@morphemes = []
|
9
|
-
|
10
|
-
parsed.each_line do |line|
|
11
|
-
@morphemes << Morpheme.new(line)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def to_one_line
|
16
|
-
@morphemes.map do |morpheme|
|
17
|
-
morpheme.surface
|
18
|
-
end.join(' ')
|
19
|
-
end
|
20
|
-
|
21
|
-
alias to_s_orig to_s
|
22
|
-
private :to_one_line, :to_s_orig
|
23
|
-
attr_reader :to_s
|
24
|
-
|
25
|
-
def inspect
|
26
|
-
to_s_orig.sub(/>$/, " #{to_one_line}>")
|
27
|
-
end
|
28
|
-
|
29
|
-
def each(&block)
|
30
|
-
if block_given?
|
31
|
-
@morphemes.each(&block)
|
32
|
-
self
|
33
|
-
else
|
34
|
-
self.to_enum { @morphemes.size }
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def [](nth)
|
39
|
-
@morphemes[nth]
|
40
|
-
end
|
41
|
-
|
42
|
-
alias at []
|
43
|
-
|
44
|
-
def size
|
45
|
-
@morphemes.size
|
46
|
-
end
|
47
|
-
|
48
|
-
alias length size
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
data/lib/mecab/light/tagger.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
module MeCab
|
2
|
-
module Light
|
3
|
-
class Tagger
|
4
|
-
def initialize
|
5
|
-
@mecab = Binding.new('')
|
6
|
-
end
|
7
|
-
|
8
|
-
def parse(string)
|
9
|
-
Result.new(parse_to_s(string))
|
10
|
-
end
|
11
|
-
|
12
|
-
private
|
13
|
-
def parse_to_s(string)
|
14
|
-
@mecab.parse_to_s(string).sub(/EOS\n$/, '')
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/mecab/light/version.rb
DELETED
data/lib/mecab/light.rb
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe MeCab::Light::Morpheme do
|
4
|
-
subject do
|
5
|
-
MeCab::Light::Morpheme.new(line)
|
6
|
-
end
|
7
|
-
|
8
|
-
context 'initialized with "surface\tfeature\n"' do
|
9
|
-
let :line do
|
10
|
-
"surface\tfeature\n"
|
11
|
-
end
|
12
|
-
|
13
|
-
specify do
|
14
|
-
expect(subject).to respond_to(:surface).with(0).arguments
|
15
|
-
end
|
16
|
-
|
17
|
-
specify do
|
18
|
-
expect(subject).to respond_to(:feature).with(0).arguments
|
19
|
-
end
|
20
|
-
|
21
|
-
describe :surface do
|
22
|
-
let :surface do
|
23
|
-
subject.surface
|
24
|
-
end
|
25
|
-
|
26
|
-
specify do
|
27
|
-
expect(surface).to eq('surface')
|
28
|
-
end
|
29
|
-
|
30
|
-
describe :encoding do
|
31
|
-
let :encoding do
|
32
|
-
surface.encoding
|
33
|
-
end
|
34
|
-
|
35
|
-
specify do
|
36
|
-
expect(encoding).to eq(Encoding::UTF_8)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe :feature do
|
42
|
-
let :feature do
|
43
|
-
subject.feature
|
44
|
-
end
|
45
|
-
|
46
|
-
specify do
|
47
|
-
expect(feature).to eq('feature')
|
48
|
-
end
|
49
|
-
|
50
|
-
describe :encoding do
|
51
|
-
let :encoding do
|
52
|
-
feature.encoding
|
53
|
-
end
|
54
|
-
|
55
|
-
specify do
|
56
|
-
expect(encoding).to eq(Encoding::UTF_8)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
describe :to_s do
|
62
|
-
let :to_s do
|
63
|
-
subject.to_s
|
64
|
-
end
|
65
|
-
|
66
|
-
specify do
|
67
|
-
expect(to_s).to eq("surface\tfeature")
|
68
|
-
end
|
69
|
-
|
70
|
-
describe :encoding do
|
71
|
-
let :encoding do
|
72
|
-
to_s.encoding
|
73
|
-
end
|
74
|
-
|
75
|
-
specify do
|
76
|
-
expect(encoding).to eq(Encoding::UTF_8)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
describe :inspect do
|
82
|
-
let :inspect do
|
83
|
-
subject.inspect
|
84
|
-
end
|
85
|
-
|
86
|
-
specify do
|
87
|
-
pattern = /^#<MeCab::Light::Morpheme:\w+ surface\tfeature>$/
|
88
|
-
expect(inspect).to match(pattern)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
@@ -1,147 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe MeCab::Light::Result do
|
4
|
-
before do
|
5
|
-
MeCab::Light::Morpheme.stub(:new).and_return(morpheme)
|
6
|
-
end
|
7
|
-
|
8
|
-
let :morpheme do
|
9
|
-
double(MeCab::Light::Morpheme,
|
10
|
-
surface: 'surface',
|
11
|
-
feature: 'feature')
|
12
|
-
end
|
13
|
-
|
14
|
-
subject do
|
15
|
-
MeCab::Light::Result.new(parsed)
|
16
|
-
end
|
17
|
-
|
18
|
-
context 'initialized with "surface\tfeature\n"' do
|
19
|
-
let :parsed do
|
20
|
-
"surface\tfeature\n"
|
21
|
-
end
|
22
|
-
|
23
|
-
specify do
|
24
|
-
expect(subject).to respond_to(:each).with(0).arguments
|
25
|
-
end
|
26
|
-
|
27
|
-
specify do
|
28
|
-
expect(subject).to be_an(Enumerable)
|
29
|
-
end
|
30
|
-
|
31
|
-
describe :each do
|
32
|
-
let :each do
|
33
|
-
subject.each(&block)
|
34
|
-
end
|
35
|
-
|
36
|
-
context 'with block' do
|
37
|
-
let :block do
|
38
|
-
lambda { |morpheme| }
|
39
|
-
end
|
40
|
-
|
41
|
-
it 'should return self' do
|
42
|
-
expect(each).to eq(subject)
|
43
|
-
end
|
44
|
-
|
45
|
-
specify do
|
46
|
-
expect { |b| subject.each(&b) }.to yield_control
|
47
|
-
end
|
48
|
-
|
49
|
-
it 'should yield with args(MeCab::Light::Morpheme)' do
|
50
|
-
expect { |b| subject.each(&b) }.to yield_with_args(morpheme)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
context 'without block' do
|
55
|
-
let :block do
|
56
|
-
nil
|
57
|
-
end
|
58
|
-
|
59
|
-
specify do
|
60
|
-
expect(each).to be_an_instance_of(Enumerator)
|
61
|
-
end
|
62
|
-
|
63
|
-
describe :size do
|
64
|
-
let :size do
|
65
|
-
each.size
|
66
|
-
end
|
67
|
-
|
68
|
-
specify do
|
69
|
-
expect(size).to eq(1)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
describe :count do
|
76
|
-
let :count do
|
77
|
-
subject.count
|
78
|
-
end
|
79
|
-
|
80
|
-
specify do
|
81
|
-
expect(count).to eq(1)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
describe :size do
|
86
|
-
let :size do
|
87
|
-
subject.size
|
88
|
-
end
|
89
|
-
|
90
|
-
specify do
|
91
|
-
expect(size).to eq(1)
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
describe :length do
|
96
|
-
let :length do
|
97
|
-
subject.length
|
98
|
-
end
|
99
|
-
|
100
|
-
specify do
|
101
|
-
expect(length).to eq(1)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
describe :[] do
|
106
|
-
let :at_literal do
|
107
|
-
subject[nth]
|
108
|
-
end
|
109
|
-
|
110
|
-
context 'with 0' do
|
111
|
-
let :nth do
|
112
|
-
0
|
113
|
-
end
|
114
|
-
|
115
|
-
it 'should be an instance of Morpheme' do
|
116
|
-
expect(at_literal).to eq(morpheme)
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
describe :at do
|
122
|
-
let :at do
|
123
|
-
subject.at(nth)
|
124
|
-
end
|
125
|
-
|
126
|
-
context 'with 0' do
|
127
|
-
let :nth do
|
128
|
-
0
|
129
|
-
end
|
130
|
-
|
131
|
-
it 'should be an instance of Morpheme' do
|
132
|
-
expect(at).to eq(morpheme)
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
describe :inspect do
|
138
|
-
let :inspect do
|
139
|
-
subject.inspect
|
140
|
-
end
|
141
|
-
|
142
|
-
specify do
|
143
|
-
expect(inspect).to match(/^#<MeCab::Light::Result:\w+ surface>$/)
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe MeCab::Light::Tagger do
|
4
|
-
before do
|
5
|
-
MeCab::Light::Binding.stub(:new).and_return(binding)
|
6
|
-
MeCab::Light::Result.stub(:new).and_return(result)
|
7
|
-
end
|
8
|
-
|
9
|
-
let :binding do
|
10
|
-
double(MeCab::Light::Binding,
|
11
|
-
parse_to_s: "surface\tfeature\nEOS\n")
|
12
|
-
end
|
13
|
-
|
14
|
-
let :result do
|
15
|
-
double(MeCab::Light::Result)
|
16
|
-
end
|
17
|
-
|
18
|
-
specify do
|
19
|
-
expect(subject).to respond_to(:parse).with(1).argument
|
20
|
-
end
|
21
|
-
|
22
|
-
describe :parse do
|
23
|
-
let :parse do
|
24
|
-
subject.parse(string)
|
25
|
-
end
|
26
|
-
|
27
|
-
context 'with "surface"' do
|
28
|
-
let :string do
|
29
|
-
'surface'
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should be an instance of MeCab::Light::Result' do
|
33
|
-
expect(parse).to eq(result)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
data/spec/spec_helper.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
require 'coveralls'
|
3
|
-
|
4
|
-
Coveralls.wear!
|
5
|
-
formatters = [
|
6
|
-
SimpleCov::Formatter::HTMLFormatter,
|
7
|
-
Coveralls::SimpleCov::Formatter,
|
8
|
-
]
|
9
|
-
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[*formatters]
|
10
|
-
|
11
|
-
SimpleCov.start do
|
12
|
-
add_filter 'spec'
|
13
|
-
end
|
14
|
-
|
15
|
-
$:.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
-
require 'mecab/light/morpheme'
|
17
|
-
require 'mecab/light/result'
|
18
|
-
require 'mecab/light/tagger'
|
19
|
-
|
20
|
-
|
21
|
-
class MeCab::Light::Binding
|
22
|
-
end
|