mecab-light 0.2.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -10
- data/Rakefile +4 -5
- data/ext/mecab/{light/extconf.rb → extconf.rb} +2 -2
- data/ext/mecab/light.c +227 -0
- data/mecab-light.gemspec +21 -10
- metadata +17 -58
- data/ext/mecab/light/binding.c +0 -50
- data/lib/mecab/light/morpheme.rb +0 -18
- data/lib/mecab/light/result.rb +0 -51
- data/lib/mecab/light/tagger.rb +0 -18
- data/lib/mecab/light/version.rb +0 -5
- data/lib/mecab/light.rb +0 -5
- data/spec/mecab-light-morpheme_spec.rb +0 -92
- data/spec/mecab-light-result_spec.rb +0 -147
- data/spec/mecab-light-tagger_spec.rb +0 -37
- data/spec/spec_helper.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 905b46000c4f3aa2da1252ee10f01f5cd44cfc6a
|
4
|
+
data.tar.gz: ffde14f8a056e8dfa9dde57900ca6e08fc6da0ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cf53d9d1d3e27cd74531190423193cba4ddae9ed334ae3b61250ab92038eeab789b671ee64d32f47034bcb77d2acfc0072a0adec109b17a19ac8ce5f6b9d686
|
7
|
+
data.tar.gz: 902dc3c3a94f117f8bb548300b6c4c151fa8d9d3dae6698f37a320a56b13c92f3b40f4a19fad02cec65018208223b048943ffd6b93575bec0a7555f315bfdd03
|
data/README.md
CHANGED
@@ -1,11 +1,5 @@
|
|
1
1
|
# MeCab::Light
|
2
2
|
|
3
|
-
[][gem]
|
4
|
-
[][travis]
|
5
|
-
[][coveralls]
|
6
|
-
[][codeclimate]
|
7
|
-
[][gemnasium]
|
8
|
-
|
9
3
|
[gem]: http://badge.fury.io/rb/mecab-light
|
10
4
|
[travis]: https://travis-ci.org/hadzimme/mecab-light
|
11
5
|
[coveralls]: https://coveralls.io/r/hadzimme/mecab-light?branch=master
|
@@ -28,24 +22,34 @@ Or install it yourself as:
|
|
28
22
|
|
29
23
|
$ gem install mecab-light
|
30
24
|
|
25
|
+
Install on Windows:
|
26
|
+
|
27
|
+
$ gem install mecab-light -- --with-mecab-folder=C:/MeCab # assign yours
|
28
|
+
|
31
29
|
## Usage
|
32
30
|
|
33
31
|
```ruby
|
34
32
|
require 'mecab/light'
|
35
33
|
|
36
|
-
tagger = MeCab::Light::Tagger.new
|
34
|
+
tagger = MeCab::Light::Tagger.new('')
|
37
35
|
string = 'この文を形態素解析してください。'
|
38
36
|
result = tagger.parse(string)
|
39
|
-
result[0].surface #=> "この"
|
40
37
|
result.kind_of?(Enumerable) #=> true
|
41
38
|
result.map(&:surface)
|
42
39
|
#=> ["この", "文", "を", "形態素", "解析", "し", "て", "ください", "。"]
|
40
|
+
|
41
|
+
model = MeCab::Light::Model.new('')
|
42
|
+
tagger = MeCab::Light::Tagger.new(model)
|
43
|
+
lattice = MeCab::Light::Lattice.new(model)
|
44
|
+
lattice.sentence = 'この文を形態素解析してください。'
|
45
|
+
result = tagger.parse(lattice)
|
46
|
+
result.map(&:surface)
|
47
|
+
#=> ["この", "文", "を", "形態素", "解析", "し", "て", "ください", "。"]
|
43
48
|
```
|
44
49
|
|
45
50
|
MeCab::Light is a lightweight tool.
|
46
51
|
This gem works without the official binding.
|
47
|
-
|
48
|
-
Note that the method's feature is totally different from its original.
|
52
|
+
Note that this supports less methods than those of C API for now.
|
49
53
|
|
50
54
|
## Contributing
|
51
55
|
|
data/Rakefile
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
|
3
3
|
Bundler.setup
|
4
|
-
require 'rspec/core/rake_task'
|
5
4
|
|
6
|
-
desc '
|
7
|
-
|
8
|
-
|
5
|
+
desc 'Run test'
|
6
|
+
|
7
|
+
task :test do
|
9
8
|
end
|
10
9
|
|
11
|
-
task :default => :
|
10
|
+
task :default => :test
|
@@ -4,10 +4,10 @@ if mecab_dir = arg_config('--with-mecab-folder')
|
|
4
4
|
sdk_dir = File.join(mecab_dir, 'sdk')
|
5
5
|
bin_dir = File.join(mecab_dir, 'bin')
|
6
6
|
if find_header('mecab.h', sdk_dir) && find_library('mecab', nil, bin_dir)
|
7
|
-
create_makefile('mecab/light
|
7
|
+
create_makefile('mecab/light')
|
8
8
|
end
|
9
9
|
else
|
10
10
|
if have_header('mecab.h') && have_library('mecab')
|
11
|
-
create_makefile('mecab/light
|
11
|
+
create_makefile('mecab/light')
|
12
12
|
end
|
13
13
|
end
|
data/ext/mecab/light.c
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
3
|
+
#include <mecab.h>
|
4
|
+
|
5
|
+
#define MECAB_LIGHT_MAJOR_VERSION 1
|
6
|
+
#define MECAB_LIGHT_MINOR_VERSION 0
|
7
|
+
#define MECAB_LIGHT_PATCH_VERSION 0
|
8
|
+
|
9
|
+
typedef struct {
|
10
|
+
mecab_model_t* ptr;
|
11
|
+
} Model;
|
12
|
+
|
13
|
+
typedef struct {
|
14
|
+
mecab_t* ptr;
|
15
|
+
} Tagger;
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
mecab_lattice_t* ptr;
|
19
|
+
rb_encoding* enc;
|
20
|
+
} Lattice;
|
21
|
+
|
22
|
+
typedef struct {
|
23
|
+
const mecab_node_t* ptr;
|
24
|
+
rb_encoding* enc;
|
25
|
+
} Node;
|
26
|
+
|
27
|
+
typedef struct {
|
28
|
+
const mecab_node_t* bos_node;
|
29
|
+
rb_encoding* enc;
|
30
|
+
} Result;
|
31
|
+
|
32
|
+
static VALUE
|
33
|
+
name_space()
|
34
|
+
{
|
35
|
+
VALUE rb_mMeCab = rb_define_module("MeCab");
|
36
|
+
return rb_define_module_under(rb_mMeCab, "Light");
|
37
|
+
}
|
38
|
+
|
39
|
+
static void
|
40
|
+
model_free(Model* model)
|
41
|
+
{
|
42
|
+
mecab_model_destroy(model->ptr);
|
43
|
+
}
|
44
|
+
|
45
|
+
static VALUE
|
46
|
+
model_alloc(VALUE klass)
|
47
|
+
{
|
48
|
+
Model* model = ALLOC(Model);
|
49
|
+
return Data_Wrap_Struct(klass, 0, model_free, model);
|
50
|
+
}
|
51
|
+
|
52
|
+
static void
|
53
|
+
tagger_free(Tagger* tagger)
|
54
|
+
{
|
55
|
+
mecab_destroy(tagger->ptr);
|
56
|
+
}
|
57
|
+
|
58
|
+
static VALUE
|
59
|
+
tagger_alloc(VALUE klass)
|
60
|
+
{
|
61
|
+
Tagger* tagger = ALLOC(Tagger);
|
62
|
+
return Data_Wrap_Struct(klass, 0, tagger_free, tagger);
|
63
|
+
}
|
64
|
+
|
65
|
+
static void
|
66
|
+
lattice_free(Lattice* lattice)
|
67
|
+
{
|
68
|
+
mecab_lattice_destroy(lattice->ptr);
|
69
|
+
}
|
70
|
+
|
71
|
+
static VALUE
|
72
|
+
lattice_alloc(VALUE klass)
|
73
|
+
{
|
74
|
+
Lattice* lattice = ALLOC(Lattice);
|
75
|
+
return Data_Wrap_Struct(klass, 0, lattice_free, lattice);
|
76
|
+
}
|
77
|
+
|
78
|
+
static VALUE
|
79
|
+
rb_model_initialize(VALUE self, VALUE arg)
|
80
|
+
{
|
81
|
+
Model* model;
|
82
|
+
|
83
|
+
Data_Get_Struct(self, Model, model);
|
84
|
+
model->ptr = mecab_model_new2(RSTRING_PTR(arg));
|
85
|
+
return Qnil;
|
86
|
+
}
|
87
|
+
|
88
|
+
static VALUE
|
89
|
+
rb_tagger_initialize(VALUE self, VALUE arg)
|
90
|
+
{
|
91
|
+
Tagger* tagger;
|
92
|
+
Model* model;
|
93
|
+
VALUE class_of_arg, rb_cModel;
|
94
|
+
|
95
|
+
Data_Get_Struct(self, Tagger, tagger);
|
96
|
+
rb_cModel = rb_define_class_under(name_space(), "Model", rb_cObject);
|
97
|
+
class_of_arg = CLASS_OF(arg);
|
98
|
+
if (class_of_arg == rb_cString) {
|
99
|
+
tagger->ptr = mecab_new2(RSTRING_PTR(arg));
|
100
|
+
} else if (class_of_arg == rb_cModel) {
|
101
|
+
Data_Get_Struct(arg, Model, model);
|
102
|
+
tagger->ptr = mecab_model_new_tagger(model->ptr);
|
103
|
+
} else {
|
104
|
+
rb_raise(rb_eTypeError, "The argument should be String or MeCab::Light::Model");
|
105
|
+
}
|
106
|
+
return Qnil;
|
107
|
+
}
|
108
|
+
|
109
|
+
static VALUE
|
110
|
+
rb_tagger_parse(VALUE self, VALUE arg)
|
111
|
+
{
|
112
|
+
Tagger* tagger;
|
113
|
+
Lattice* lattice;
|
114
|
+
Result* result = ALLOC(Result);
|
115
|
+
VALUE class_of_arg, rb_cLattice, rb_cResult;
|
116
|
+
|
117
|
+
Data_Get_Struct(self, Tagger, tagger);
|
118
|
+
rb_cLattice = rb_define_class_under(name_space(), "Lattice", rb_cObject);
|
119
|
+
class_of_arg = CLASS_OF(arg);
|
120
|
+
if (class_of_arg == rb_cString) {
|
121
|
+
result->bos_node = mecab_sparse_tonode(tagger->ptr, RSTRING_PTR(arg));
|
122
|
+
result->enc = rb_enc_get(arg);
|
123
|
+
} else if (class_of_arg == rb_cLattice) {
|
124
|
+
Data_Get_Struct(arg, Lattice, lattice);
|
125
|
+
mecab_parse_lattice(tagger->ptr, lattice->ptr);
|
126
|
+
result->bos_node = mecab_lattice_get_bos_node(lattice->ptr);
|
127
|
+
result->enc = lattice->enc;
|
128
|
+
} else {
|
129
|
+
rb_raise(rb_eTypeError, "The argument should be String or MeCab::Light::Lattice");
|
130
|
+
}
|
131
|
+
rb_cResult = rb_define_class_under(name_space(), "Result", rb_cObject);
|
132
|
+
return Data_Wrap_Struct(rb_cResult, 0, 0, result);
|
133
|
+
}
|
134
|
+
|
135
|
+
static VALUE
|
136
|
+
rb_lattice_initialize(VALUE self, VALUE rb_model)
|
137
|
+
{
|
138
|
+
Lattice* lattice;
|
139
|
+
Model* model;
|
140
|
+
|
141
|
+
Data_Get_Struct(self, Lattice, lattice);
|
142
|
+
Data_Get_Struct(rb_model, Model, model);
|
143
|
+
lattice->ptr = mecab_model_new_lattice(model->ptr);
|
144
|
+
return Qnil;
|
145
|
+
}
|
146
|
+
|
147
|
+
static VALUE
|
148
|
+
rb_lattice_set_sentence(VALUE self, VALUE str)
|
149
|
+
{
|
150
|
+
Lattice* lattice;
|
151
|
+
|
152
|
+
Data_Get_Struct(self, Lattice, lattice);
|
153
|
+
mecab_lattice_set_sentence(lattice->ptr, RSTRING_PTR(str));
|
154
|
+
lattice->enc = rb_enc_get(str);
|
155
|
+
return str;
|
156
|
+
}
|
157
|
+
|
158
|
+
static VALUE
|
159
|
+
result_enum_length(VALUE self, VALUE args, VALUE eobj)
|
160
|
+
{
|
161
|
+
return rb_funcall(self, rb_intern("count"), 0);
|
162
|
+
}
|
163
|
+
|
164
|
+
static VALUE
|
165
|
+
rb_result_each(VALUE self)
|
166
|
+
{
|
167
|
+
Result* result;
|
168
|
+
Node* node;
|
169
|
+
VALUE rb_cNode;
|
170
|
+
|
171
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, result_enum_length);
|
172
|
+
Data_Get_Struct(self, Result, result);
|
173
|
+
node = ALLOC(Node);
|
174
|
+
node->ptr = result->bos_node->next;
|
175
|
+
node->enc = result->enc;
|
176
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
177
|
+
for (; node->ptr->next; node->ptr = node->ptr->next) {
|
178
|
+
rb_yield(Data_Wrap_Struct(rb_cNode, 0, 0, node));
|
179
|
+
}
|
180
|
+
return self;
|
181
|
+
}
|
182
|
+
|
183
|
+
static VALUE
|
184
|
+
rb_node_get_surface(VALUE self)
|
185
|
+
{
|
186
|
+
Node* node;
|
187
|
+
VALUE surface;
|
188
|
+
|
189
|
+
Data_Get_Struct(self, Node, node);
|
190
|
+
surface = rb_str_new(node->ptr->surface, node->ptr->length);
|
191
|
+
return rb_enc_associate(surface, node->enc);
|
192
|
+
}
|
193
|
+
|
194
|
+
static VALUE
|
195
|
+
rb_node_get_feature(VALUE self)
|
196
|
+
{
|
197
|
+
Node* node;
|
198
|
+
VALUE feature;
|
199
|
+
|
200
|
+
Data_Get_Struct(self, Node, node);
|
201
|
+
feature = rb_str_new2(node->ptr->feature);
|
202
|
+
return rb_enc_associate(feature, node->enc);
|
203
|
+
}
|
204
|
+
|
205
|
+
void
|
206
|
+
Init_light()
|
207
|
+
{
|
208
|
+
VALUE rb_cModel, rb_cLattice, rb_cTagger, rb_cNode, rb_cResult;
|
209
|
+
|
210
|
+
rb_cModel = rb_define_class_under(name_space(), "Model", rb_cObject);
|
211
|
+
rb_cTagger = rb_define_class_under(name_space(), "Tagger", rb_cObject);
|
212
|
+
rb_cLattice = rb_define_class_under(name_space(), "Lattice", rb_cObject);
|
213
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
214
|
+
rb_cResult = rb_define_class_under(name_space(), "Result", rb_cObject);
|
215
|
+
rb_define_alloc_func(rb_cModel, model_alloc);
|
216
|
+
rb_define_alloc_func(rb_cTagger, tagger_alloc);
|
217
|
+
rb_define_alloc_func(rb_cLattice, lattice_alloc);
|
218
|
+
rb_define_private_method(rb_cModel, "initialize", rb_model_initialize, 1);
|
219
|
+
rb_define_private_method(rb_cTagger, "initialize", rb_tagger_initialize, 1);
|
220
|
+
rb_define_private_method(rb_cLattice, "initialize", rb_lattice_initialize, 1);
|
221
|
+
rb_define_method(rb_cTagger, "parse", rb_tagger_parse, 1);
|
222
|
+
rb_define_method(rb_cLattice, "sentence=", rb_lattice_set_sentence, 1);
|
223
|
+
rb_define_method(rb_cResult, "each", rb_result_each, 0);
|
224
|
+
rb_define_method(rb_cNode, "surface", rb_node_get_surface, 0);
|
225
|
+
rb_define_method(rb_cNode, "feature", rb_node_get_feature, 0);
|
226
|
+
rb_include_module(rb_cResult, rb_mEnumerable);
|
227
|
+
}
|
data/mecab-light.gemspec
CHANGED
@@ -1,11 +1,24 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
base_dir = File.dirname(__FILE__)
|
2
|
+
ext_dir = File.join(base_dir, 'ext', 'mecab')
|
3
|
+
|
4
|
+
guess_version = lambda do |ext_dir|
|
5
|
+
version = {}
|
6
|
+
|
7
|
+
File.open(File.join(ext_dir, 'light.c')) do |light_c|
|
8
|
+
light_c.each_line do |line|
|
9
|
+
case line
|
10
|
+
when /\A#define MECAB_LIGHT_([A-Z]+)_VERSION (\d+)/
|
11
|
+
version[$1.intern] = $2
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
[version[:MAJOR], version[:MINOR], version[:PATCH]].join('.')
|
17
|
+
end
|
5
18
|
|
6
19
|
Gem::Specification.new do |gem|
|
7
20
|
gem.name = "mecab-light"
|
8
|
-
gem.version =
|
21
|
+
gem.version = guess_version.call(ext_dir)
|
9
22
|
gem.authors = ["Hajime Wakahara"]
|
10
23
|
gem.email = ["hadzimme@icloud.com"]
|
11
24
|
gem.description = %q{Use a sequence of morphemes as an Enumerable object.}
|
@@ -16,11 +29,9 @@ Gem::Specification.new do |gem|
|
|
16
29
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
30
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
31
|
gem.require_paths = ["lib"]
|
19
|
-
gem.required_ruby_version = '>=
|
20
|
-
gem.extensions << 'ext/mecab/
|
32
|
+
gem.required_ruby_version = '>= 2.0'
|
33
|
+
gem.extensions << 'ext/mecab/extconf.rb'
|
21
34
|
|
22
35
|
gem.add_development_dependency 'rake'
|
23
|
-
gem.add_development_dependency '
|
24
|
-
gem.add_development_dependency 'simplecov'
|
25
|
-
gem.add_development_dependency 'coveralls'
|
36
|
+
gem.add_development_dependency 'test-unit'
|
26
37
|
end
|
metadata
CHANGED
@@ -1,69 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mecab-light
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hajime Wakahara
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: test-unit
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: simplecov
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - '>='
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - '>='
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: coveralls
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - '>='
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - '>='
|
38
|
+
- - ">="
|
67
39
|
- !ruby/object:Gem::Version
|
68
40
|
version: '0'
|
69
41
|
description: Use a sequence of morphemes as an Enumerable object.
|
@@ -71,27 +43,18 @@ email:
|
|
71
43
|
- hadzimme@icloud.com
|
72
44
|
executables: []
|
73
45
|
extensions:
|
74
|
-
- ext/mecab/
|
46
|
+
- ext/mecab/extconf.rb
|
75
47
|
extra_rdoc_files: []
|
76
48
|
files:
|
77
|
-
- .gitignore
|
78
|
-
- .travis.yml
|
49
|
+
- ".gitignore"
|
50
|
+
- ".travis.yml"
|
79
51
|
- Gemfile
|
80
52
|
- LICENSE.txt
|
81
53
|
- README.md
|
82
54
|
- Rakefile
|
83
|
-
- ext/mecab/
|
84
|
-
- ext/mecab/light
|
85
|
-
- lib/mecab/light.rb
|
86
|
-
- lib/mecab/light/morpheme.rb
|
87
|
-
- lib/mecab/light/result.rb
|
88
|
-
- lib/mecab/light/tagger.rb
|
89
|
-
- lib/mecab/light/version.rb
|
55
|
+
- ext/mecab/extconf.rb
|
56
|
+
- ext/mecab/light.c
|
90
57
|
- mecab-light.gemspec
|
91
|
-
- spec/mecab-light-morpheme_spec.rb
|
92
|
-
- spec/mecab-light-result_spec.rb
|
93
|
-
- spec/mecab-light-tagger_spec.rb
|
94
|
-
- spec/spec_helper.rb
|
95
58
|
homepage: https://github.com/hadzimme/mecab-light
|
96
59
|
licenses: []
|
97
60
|
metadata: {}
|
@@ -101,22 +64,18 @@ require_paths:
|
|
101
64
|
- lib
|
102
65
|
required_ruby_version: !ruby/object:Gem::Requirement
|
103
66
|
requirements:
|
104
|
-
- -
|
67
|
+
- - ">="
|
105
68
|
- !ruby/object:Gem::Version
|
106
|
-
version: '
|
69
|
+
version: '2.0'
|
107
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
71
|
requirements:
|
109
|
-
- -
|
72
|
+
- - ">="
|
110
73
|
- !ruby/object:Gem::Version
|
111
74
|
version: '0'
|
112
75
|
requirements: []
|
113
76
|
rubyforge_project:
|
114
|
-
rubygems_version: 2.
|
77
|
+
rubygems_version: 2.3.0
|
115
78
|
signing_key:
|
116
79
|
specification_version: 4
|
117
80
|
summary: An simple interface for MeCab (UNOFFICIAL)
|
118
|
-
test_files:
|
119
|
-
- spec/mecab-light-morpheme_spec.rb
|
120
|
-
- spec/mecab-light-result_spec.rb
|
121
|
-
- spec/mecab-light-tagger_spec.rb
|
122
|
-
- spec/spec_helper.rb
|
81
|
+
test_files: []
|
data/ext/mecab/light/binding.c
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
#include <ruby.h>
|
2
|
-
#include <ruby/encoding.h>
|
3
|
-
#include <mecab.h>
|
4
|
-
|
5
|
-
typedef struct {
|
6
|
-
mecab_t* ptr;
|
7
|
-
} MeCab;
|
8
|
-
|
9
|
-
static VALUE
|
10
|
-
mecab_alloc(VALUE klass)
|
11
|
-
{
|
12
|
-
MeCab* mecab = ALLOC(MeCab);
|
13
|
-
return Data_Wrap_Struct(klass, 0, 0, mecab);
|
14
|
-
}
|
15
|
-
|
16
|
-
static VALUE
|
17
|
-
rb_mecab_initialize(VALUE self, VALUE arg)
|
18
|
-
{
|
19
|
-
MeCab* mecab;
|
20
|
-
|
21
|
-
Data_Get_Struct(self, MeCab, mecab);
|
22
|
-
mecab->ptr = mecab_new2(RSTRING_PTR(arg));
|
23
|
-
return Qnil;
|
24
|
-
}
|
25
|
-
|
26
|
-
static VALUE
|
27
|
-
rb_mecab_parse_to_s(VALUE self, VALUE str)
|
28
|
-
{
|
29
|
-
MeCab* mecab;
|
30
|
-
const char* result;
|
31
|
-
rb_encoding* enc;
|
32
|
-
|
33
|
-
Data_Get_Struct(self, MeCab, mecab);
|
34
|
-
enc = rb_enc_get(str);
|
35
|
-
result = mecab_sparse_tostr(mecab->ptr, RSTRING_PTR(str));
|
36
|
-
return rb_enc_associate(rb_str_new2(result), enc);
|
37
|
-
}
|
38
|
-
|
39
|
-
void
|
40
|
-
Init_binding()
|
41
|
-
{
|
42
|
-
VALUE rb_mMeCab, rb_mLight, rb_cBinding;
|
43
|
-
|
44
|
-
rb_mMeCab = rb_define_module("MeCab");
|
45
|
-
rb_mLight = rb_define_module_under(rb_mMeCab, "Light");
|
46
|
-
rb_cBinding = rb_define_class_under(rb_mLight, "Binding", rb_cObject);
|
47
|
-
rb_define_alloc_func(rb_cBinding, mecab_alloc);
|
48
|
-
rb_define_private_method(rb_cBinding, "initialize", rb_mecab_initialize, 1);
|
49
|
-
rb_define_method(rb_cBinding, "parse_to_s", rb_mecab_parse_to_s, 1);
|
50
|
-
}
|
data/lib/mecab/light/morpheme.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
module MeCab
|
2
|
-
module Light
|
3
|
-
class Morpheme
|
4
|
-
def initialize(line)
|
5
|
-
@to_s = line.chomp
|
6
|
-
@surface, @feature = @to_s.split(/\t/)
|
7
|
-
end
|
8
|
-
|
9
|
-
alias to_s_orig to_s
|
10
|
-
private :to_s_orig
|
11
|
-
attr_reader :surface, :feature, :to_s
|
12
|
-
|
13
|
-
def inspect
|
14
|
-
to_s_orig.sub(/>$/, " #{@to_s}>")
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/mecab/light/result.rb
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
module MeCab
|
2
|
-
module Light
|
3
|
-
class Result
|
4
|
-
include Enumerable
|
5
|
-
|
6
|
-
def initialize(parsed)
|
7
|
-
@to_s = parsed
|
8
|
-
@morphemes = []
|
9
|
-
|
10
|
-
parsed.each_line do |line|
|
11
|
-
@morphemes << Morpheme.new(line)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def to_one_line
|
16
|
-
@morphemes.map do |morpheme|
|
17
|
-
morpheme.surface
|
18
|
-
end.join(' ')
|
19
|
-
end
|
20
|
-
|
21
|
-
alias to_s_orig to_s
|
22
|
-
private :to_one_line, :to_s_orig
|
23
|
-
attr_reader :to_s
|
24
|
-
|
25
|
-
def inspect
|
26
|
-
to_s_orig.sub(/>$/, " #{to_one_line}>")
|
27
|
-
end
|
28
|
-
|
29
|
-
def each(&block)
|
30
|
-
if block_given?
|
31
|
-
@morphemes.each(&block)
|
32
|
-
self
|
33
|
-
else
|
34
|
-
self.to_enum { @morphemes.size }
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def [](nth)
|
39
|
-
@morphemes[nth]
|
40
|
-
end
|
41
|
-
|
42
|
-
alias at []
|
43
|
-
|
44
|
-
def size
|
45
|
-
@morphemes.size
|
46
|
-
end
|
47
|
-
|
48
|
-
alias length size
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
data/lib/mecab/light/tagger.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
module MeCab
|
2
|
-
module Light
|
3
|
-
class Tagger
|
4
|
-
def initialize
|
5
|
-
@mecab = Binding.new('')
|
6
|
-
end
|
7
|
-
|
8
|
-
def parse(string)
|
9
|
-
Result.new(parse_to_s(string))
|
10
|
-
end
|
11
|
-
|
12
|
-
private
|
13
|
-
def parse_to_s(string)
|
14
|
-
@mecab.parse_to_s(string).sub(/EOS\n$/, '')
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/mecab/light/version.rb
DELETED
data/lib/mecab/light.rb
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe MeCab::Light::Morpheme do
|
4
|
-
subject do
|
5
|
-
MeCab::Light::Morpheme.new(line)
|
6
|
-
end
|
7
|
-
|
8
|
-
context 'initialized with "surface\tfeature\n"' do
|
9
|
-
let :line do
|
10
|
-
"surface\tfeature\n"
|
11
|
-
end
|
12
|
-
|
13
|
-
specify do
|
14
|
-
expect(subject).to respond_to(:surface).with(0).arguments
|
15
|
-
end
|
16
|
-
|
17
|
-
specify do
|
18
|
-
expect(subject).to respond_to(:feature).with(0).arguments
|
19
|
-
end
|
20
|
-
|
21
|
-
describe :surface do
|
22
|
-
let :surface do
|
23
|
-
subject.surface
|
24
|
-
end
|
25
|
-
|
26
|
-
specify do
|
27
|
-
expect(surface).to eq('surface')
|
28
|
-
end
|
29
|
-
|
30
|
-
describe :encoding do
|
31
|
-
let :encoding do
|
32
|
-
surface.encoding
|
33
|
-
end
|
34
|
-
|
35
|
-
specify do
|
36
|
-
expect(encoding).to eq(Encoding::UTF_8)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe :feature do
|
42
|
-
let :feature do
|
43
|
-
subject.feature
|
44
|
-
end
|
45
|
-
|
46
|
-
specify do
|
47
|
-
expect(feature).to eq('feature')
|
48
|
-
end
|
49
|
-
|
50
|
-
describe :encoding do
|
51
|
-
let :encoding do
|
52
|
-
feature.encoding
|
53
|
-
end
|
54
|
-
|
55
|
-
specify do
|
56
|
-
expect(encoding).to eq(Encoding::UTF_8)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
describe :to_s do
|
62
|
-
let :to_s do
|
63
|
-
subject.to_s
|
64
|
-
end
|
65
|
-
|
66
|
-
specify do
|
67
|
-
expect(to_s).to eq("surface\tfeature")
|
68
|
-
end
|
69
|
-
|
70
|
-
describe :encoding do
|
71
|
-
let :encoding do
|
72
|
-
to_s.encoding
|
73
|
-
end
|
74
|
-
|
75
|
-
specify do
|
76
|
-
expect(encoding).to eq(Encoding::UTF_8)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
describe :inspect do
|
82
|
-
let :inspect do
|
83
|
-
subject.inspect
|
84
|
-
end
|
85
|
-
|
86
|
-
specify do
|
87
|
-
pattern = /^#<MeCab::Light::Morpheme:\w+ surface\tfeature>$/
|
88
|
-
expect(inspect).to match(pattern)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
@@ -1,147 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe MeCab::Light::Result do
|
4
|
-
before do
|
5
|
-
MeCab::Light::Morpheme.stub(:new).and_return(morpheme)
|
6
|
-
end
|
7
|
-
|
8
|
-
let :morpheme do
|
9
|
-
double(MeCab::Light::Morpheme,
|
10
|
-
surface: 'surface',
|
11
|
-
feature: 'feature')
|
12
|
-
end
|
13
|
-
|
14
|
-
subject do
|
15
|
-
MeCab::Light::Result.new(parsed)
|
16
|
-
end
|
17
|
-
|
18
|
-
context 'initialized with "surface\tfeature\n"' do
|
19
|
-
let :parsed do
|
20
|
-
"surface\tfeature\n"
|
21
|
-
end
|
22
|
-
|
23
|
-
specify do
|
24
|
-
expect(subject).to respond_to(:each).with(0).arguments
|
25
|
-
end
|
26
|
-
|
27
|
-
specify do
|
28
|
-
expect(subject).to be_an(Enumerable)
|
29
|
-
end
|
30
|
-
|
31
|
-
describe :each do
|
32
|
-
let :each do
|
33
|
-
subject.each(&block)
|
34
|
-
end
|
35
|
-
|
36
|
-
context 'with block' do
|
37
|
-
let :block do
|
38
|
-
lambda { |morpheme| }
|
39
|
-
end
|
40
|
-
|
41
|
-
it 'should return self' do
|
42
|
-
expect(each).to eq(subject)
|
43
|
-
end
|
44
|
-
|
45
|
-
specify do
|
46
|
-
expect { |b| subject.each(&b) }.to yield_control
|
47
|
-
end
|
48
|
-
|
49
|
-
it 'should yield with args(MeCab::Light::Morpheme)' do
|
50
|
-
expect { |b| subject.each(&b) }.to yield_with_args(morpheme)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
context 'without block' do
|
55
|
-
let :block do
|
56
|
-
nil
|
57
|
-
end
|
58
|
-
|
59
|
-
specify do
|
60
|
-
expect(each).to be_an_instance_of(Enumerator)
|
61
|
-
end
|
62
|
-
|
63
|
-
describe :size do
|
64
|
-
let :size do
|
65
|
-
each.size
|
66
|
-
end
|
67
|
-
|
68
|
-
specify do
|
69
|
-
expect(size).to eq(1)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
describe :count do
|
76
|
-
let :count do
|
77
|
-
subject.count
|
78
|
-
end
|
79
|
-
|
80
|
-
specify do
|
81
|
-
expect(count).to eq(1)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
describe :size do
|
86
|
-
let :size do
|
87
|
-
subject.size
|
88
|
-
end
|
89
|
-
|
90
|
-
specify do
|
91
|
-
expect(size).to eq(1)
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
describe :length do
|
96
|
-
let :length do
|
97
|
-
subject.length
|
98
|
-
end
|
99
|
-
|
100
|
-
specify do
|
101
|
-
expect(length).to eq(1)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
describe :[] do
|
106
|
-
let :at_literal do
|
107
|
-
subject[nth]
|
108
|
-
end
|
109
|
-
|
110
|
-
context 'with 0' do
|
111
|
-
let :nth do
|
112
|
-
0
|
113
|
-
end
|
114
|
-
|
115
|
-
it 'should be an instance of Morpheme' do
|
116
|
-
expect(at_literal).to eq(morpheme)
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
describe :at do
|
122
|
-
let :at do
|
123
|
-
subject.at(nth)
|
124
|
-
end
|
125
|
-
|
126
|
-
context 'with 0' do
|
127
|
-
let :nth do
|
128
|
-
0
|
129
|
-
end
|
130
|
-
|
131
|
-
it 'should be an instance of Morpheme' do
|
132
|
-
expect(at).to eq(morpheme)
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
describe :inspect do
|
138
|
-
let :inspect do
|
139
|
-
subject.inspect
|
140
|
-
end
|
141
|
-
|
142
|
-
specify do
|
143
|
-
expect(inspect).to match(/^#<MeCab::Light::Result:\w+ surface>$/)
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe MeCab::Light::Tagger do
|
4
|
-
before do
|
5
|
-
MeCab::Light::Binding.stub(:new).and_return(binding)
|
6
|
-
MeCab::Light::Result.stub(:new).and_return(result)
|
7
|
-
end
|
8
|
-
|
9
|
-
let :binding do
|
10
|
-
double(MeCab::Light::Binding,
|
11
|
-
parse_to_s: "surface\tfeature\nEOS\n")
|
12
|
-
end
|
13
|
-
|
14
|
-
let :result do
|
15
|
-
double(MeCab::Light::Result)
|
16
|
-
end
|
17
|
-
|
18
|
-
specify do
|
19
|
-
expect(subject).to respond_to(:parse).with(1).argument
|
20
|
-
end
|
21
|
-
|
22
|
-
describe :parse do
|
23
|
-
let :parse do
|
24
|
-
subject.parse(string)
|
25
|
-
end
|
26
|
-
|
27
|
-
context 'with "surface"' do
|
28
|
-
let :string do
|
29
|
-
'surface'
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should be an instance of MeCab::Light::Result' do
|
33
|
-
expect(parse).to eq(result)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
data/spec/spec_helper.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
require 'coveralls'
|
3
|
-
|
4
|
-
Coveralls.wear!
|
5
|
-
formatters = [
|
6
|
-
SimpleCov::Formatter::HTMLFormatter,
|
7
|
-
Coveralls::SimpleCov::Formatter,
|
8
|
-
]
|
9
|
-
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[*formatters]
|
10
|
-
|
11
|
-
SimpleCov.start do
|
12
|
-
add_filter 'spec'
|
13
|
-
end
|
14
|
-
|
15
|
-
$:.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
-
require 'mecab/light/morpheme'
|
17
|
-
require 'mecab/light/result'
|
18
|
-
require 'mecab/light/tagger'
|
19
|
-
|
20
|
-
|
21
|
-
class MeCab::Light::Binding
|
22
|
-
end
|