mecab-heavy 0.996.2.1 → 0.996.3dev
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/mecab/light.c +352 -0
- data/ext/mecab/mecab_wrap.cpp +3 -0
- data/lib/mecab/heavy.rb +0 -1
- data/lib/mecab/heavy/modular.rb +1 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cee5887cd3d71167c6da6f2b8204d3c0699e5b9d
|
4
|
+
data.tar.gz: ed5c7d5a4cbf5a9c4987fec36fb4764e648974df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 09b953f59399c26e9288daccd0254df4d65976f680edebe61bcde3893427f0595b5026925e318dbe8edf4b3004e95afecf95a285d96d1888602a0bdd79f65c92
|
7
|
+
data.tar.gz: ef404615d73e881a4ee6475b5b03d0ef73ba3297259cff1b06bbfb69dd1c28e7e9e824e55f19885651b55c25c2a0904457e62cb1dcdc3ff764beeab6fdaddc93
|
data/ext/mecab/light.c
ADDED
@@ -0,0 +1,352 @@
|
|
1
|
+
#ifdef __cplusplus
|
2
|
+
extern "C" {
|
3
|
+
#endif
|
4
|
+
|
5
|
+
#include <ruby.h>
|
6
|
+
#include <ruby/encoding.h>
|
7
|
+
#include <mecab.h>
|
8
|
+
|
9
|
+
#define MECAB_LIGHT_MAJOR_VERSION 1
|
10
|
+
#define MECAB_LIGHT_MINOR_VERSION 0
|
11
|
+
#define MECAB_LIGHT_PATCH_VERSION 1
|
12
|
+
|
13
|
+
typedef struct {
|
14
|
+
mecab_model_t* ptr;
|
15
|
+
} Model;
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
mecab_t* ptr;
|
19
|
+
} Tagger;
|
20
|
+
|
21
|
+
typedef struct {
|
22
|
+
mecab_lattice_t* ptr;
|
23
|
+
rb_encoding* enc;
|
24
|
+
} Lattice;
|
25
|
+
|
26
|
+
typedef struct {
|
27
|
+
const mecab_node_t* ptr;
|
28
|
+
rb_encoding* enc;
|
29
|
+
} Node;
|
30
|
+
|
31
|
+
typedef struct {
|
32
|
+
const mecab_node_t* bos_node;
|
33
|
+
rb_encoding* enc;
|
34
|
+
} Result;
|
35
|
+
|
36
|
+
static VALUE
|
37
|
+
name_space()
|
38
|
+
{
|
39
|
+
VALUE rb_mMeCab = rb_define_module("MeCab");
|
40
|
+
return rb_define_module_under(rb_mMeCab, "Light");
|
41
|
+
}
|
42
|
+
|
43
|
+
static void
|
44
|
+
free_model(Model* model)
|
45
|
+
{
|
46
|
+
mecab_model_destroy(model->ptr);
|
47
|
+
free(model);
|
48
|
+
}
|
49
|
+
|
50
|
+
static VALUE
|
51
|
+
model_alloc(VALUE klass)
|
52
|
+
{
|
53
|
+
Model* model = ALLOC(Model);
|
54
|
+
return Data_Wrap_Struct(klass, 0, free_model, model);
|
55
|
+
}
|
56
|
+
|
57
|
+
static void
|
58
|
+
free_tagger(Tagger* tagger)
|
59
|
+
{
|
60
|
+
mecab_destroy(tagger->ptr);
|
61
|
+
free(tagger);
|
62
|
+
}
|
63
|
+
|
64
|
+
static VALUE
|
65
|
+
tagger_alloc(VALUE klass)
|
66
|
+
{
|
67
|
+
Tagger* tagger = ALLOC(Tagger);
|
68
|
+
return Data_Wrap_Struct(klass, 0, free_tagger, tagger);
|
69
|
+
}
|
70
|
+
|
71
|
+
static void
|
72
|
+
free_lattice(Lattice* lattice)
|
73
|
+
{
|
74
|
+
mecab_lattice_destroy(lattice->ptr);
|
75
|
+
free(lattice);
|
76
|
+
}
|
77
|
+
|
78
|
+
static VALUE
|
79
|
+
lattice_alloc(VALUE klass)
|
80
|
+
{
|
81
|
+
Lattice* lattice = ALLOC(Lattice);
|
82
|
+
return Data_Wrap_Struct(klass, 0, free_lattice, lattice);
|
83
|
+
}
|
84
|
+
|
85
|
+
static void
|
86
|
+
free_node(Node* node)
|
87
|
+
{
|
88
|
+
free(node);
|
89
|
+
}
|
90
|
+
|
91
|
+
static void
|
92
|
+
free_result(Result* result)
|
93
|
+
{
|
94
|
+
free(result);
|
95
|
+
}
|
96
|
+
|
97
|
+
static VALUE
|
98
|
+
rb_model_initialize(VALUE self, VALUE arg)
|
99
|
+
{
|
100
|
+
Model* model;
|
101
|
+
|
102
|
+
Data_Get_Struct(self, Model, model);
|
103
|
+
model->ptr = mecab_model_new2(RSTRING_PTR(arg));
|
104
|
+
return Qnil;
|
105
|
+
}
|
106
|
+
|
107
|
+
static VALUE
|
108
|
+
rb_tagger_initialize(VALUE self, VALUE arg)
|
109
|
+
{
|
110
|
+
Tagger* tagger;
|
111
|
+
Model* model;
|
112
|
+
VALUE class_of_arg, rb_cModel;
|
113
|
+
|
114
|
+
Data_Get_Struct(self, Tagger, tagger);
|
115
|
+
rb_cModel = rb_define_class_under(name_space(), "Model", rb_cObject);
|
116
|
+
class_of_arg = CLASS_OF(arg);
|
117
|
+
if (class_of_arg == rb_cString) {
|
118
|
+
tagger->ptr = mecab_new2(RSTRING_PTR(arg));
|
119
|
+
} else if (class_of_arg == rb_cModel) {
|
120
|
+
Data_Get_Struct(arg, Model, model);
|
121
|
+
tagger->ptr = mecab_model_new_tagger(model->ptr);
|
122
|
+
} else {
|
123
|
+
rb_raise(rb_eTypeError, "The argument should be String or MeCab::Light::Model");
|
124
|
+
}
|
125
|
+
return Qnil;
|
126
|
+
}
|
127
|
+
|
128
|
+
static VALUE
|
129
|
+
rb_tagger_parse(VALUE self, VALUE arg)
|
130
|
+
{
|
131
|
+
Tagger* tagger;
|
132
|
+
Lattice* lattice;
|
133
|
+
Result* result = ALLOC(Result);
|
134
|
+
VALUE class_of_arg, rb_cLattice, rb_cResult;
|
135
|
+
|
136
|
+
Data_Get_Struct(self, Tagger, tagger);
|
137
|
+
rb_cLattice = rb_define_class_under(name_space(), "Lattice", rb_cObject);
|
138
|
+
class_of_arg = CLASS_OF(arg);
|
139
|
+
if (class_of_arg == rb_cString) {
|
140
|
+
result->bos_node = mecab_sparse_tonode(tagger->ptr, RSTRING_PTR(arg));
|
141
|
+
result->enc = rb_enc_get(arg);
|
142
|
+
} else if (class_of_arg == rb_cLattice) {
|
143
|
+
Data_Get_Struct(arg, Lattice, lattice);
|
144
|
+
mecab_parse_lattice(tagger->ptr, lattice->ptr);
|
145
|
+
result->bos_node = mecab_lattice_get_bos_node(lattice->ptr);
|
146
|
+
result->enc = lattice->enc;
|
147
|
+
} else {
|
148
|
+
rb_raise(rb_eTypeError, "The argument should be String or MeCab::Light::Lattice");
|
149
|
+
}
|
150
|
+
rb_cResult = rb_define_class_under(name_space(), "Result", rb_cObject);
|
151
|
+
return Data_Wrap_Struct(rb_cResult, 0, free_result, result);
|
152
|
+
}
|
153
|
+
|
154
|
+
static VALUE
|
155
|
+
rb_lattice_initialize(VALUE self, VALUE rb_model)
|
156
|
+
{
|
157
|
+
Lattice* lattice;
|
158
|
+
Model* model;
|
159
|
+
|
160
|
+
Data_Get_Struct(self, Lattice, lattice);
|
161
|
+
Data_Get_Struct(rb_model, Model, model);
|
162
|
+
lattice->ptr = mecab_model_new_lattice(model->ptr);
|
163
|
+
return Qnil;
|
164
|
+
}
|
165
|
+
|
166
|
+
static VALUE
|
167
|
+
rb_lattice_clear(VALUE self)
|
168
|
+
{
|
169
|
+
Lattice* lattice;
|
170
|
+
|
171
|
+
Data_Get_Struct(self, Lattice, lattice);
|
172
|
+
mecab_lattice_clear(lattice->ptr);
|
173
|
+
return Qnil;
|
174
|
+
}
|
175
|
+
|
176
|
+
static VALUE
|
177
|
+
rb_lattice_available_p(VALUE self)
|
178
|
+
{
|
179
|
+
Lattice* lattice;
|
180
|
+
int is_available;
|
181
|
+
|
182
|
+
Data_Get_Struct(self, Lattice, lattice);
|
183
|
+
is_available = mecab_lattice_is_available(lattice->ptr);
|
184
|
+
if (is_available == 0) {
|
185
|
+
return Qfalse;
|
186
|
+
} else {
|
187
|
+
return Qtrue;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
|
191
|
+
static VALUE
|
192
|
+
rb_lattice_get_bos_node(VALUE self)
|
193
|
+
{
|
194
|
+
Lattice* lattice;
|
195
|
+
Node* node = ALLOC(Node);
|
196
|
+
VALUE rb_cNode;
|
197
|
+
|
198
|
+
Data_Get_Struct(self, Lattice, lattice);
|
199
|
+
node->ptr = mecab_lattice_get_bos_node(lattice->ptr);
|
200
|
+
node->enc = lattice->enc;
|
201
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
202
|
+
return Data_Wrap_Struct(rb_cNode, 0, free_node, node);
|
203
|
+
}
|
204
|
+
|
205
|
+
static VALUE
|
206
|
+
rb_lattice_get_sentence(VALUE self)
|
207
|
+
{
|
208
|
+
Lattice* lattice;
|
209
|
+
const char* sentence;
|
210
|
+
|
211
|
+
Data_Get_Struct(self, Lattice, lattice);
|
212
|
+
sentence = mecab_lattice_get_sentence(lattice->ptr);
|
213
|
+
if (sentence == NULL) {
|
214
|
+
return Qnil;
|
215
|
+
} else {
|
216
|
+
return rb_enc_associate(rb_str_new2(sentence), lattice->enc);
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
static VALUE
|
221
|
+
rb_lattice_set_sentence(VALUE self, VALUE str)
|
222
|
+
{
|
223
|
+
Lattice* lattice;
|
224
|
+
|
225
|
+
Data_Get_Struct(self, Lattice, lattice);
|
226
|
+
mecab_lattice_set_sentence(lattice->ptr, RSTRING_PTR(str));
|
227
|
+
lattice->enc = rb_enc_get(str);
|
228
|
+
return str;
|
229
|
+
}
|
230
|
+
|
231
|
+
static VALUE
|
232
|
+
result_enum_length(VALUE self, VALUE args, VALUE eobj)
|
233
|
+
{
|
234
|
+
return rb_funcall(self, rb_intern("count"), 0);
|
235
|
+
}
|
236
|
+
|
237
|
+
static VALUE
|
238
|
+
rb_result_each(VALUE self)
|
239
|
+
{
|
240
|
+
Result* result;
|
241
|
+
Node* node;
|
242
|
+
mecab_node_t* m_node;
|
243
|
+
VALUE rb_cNode;
|
244
|
+
|
245
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, result_enum_length);
|
246
|
+
Data_Get_Struct(self, Result, result);
|
247
|
+
m_node = result->bos_node->next;
|
248
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
249
|
+
for (; m_node->next; m_node = m_node->next) {
|
250
|
+
node = ALLOC(Node);
|
251
|
+
node->ptr = m_node;
|
252
|
+
node->enc = result->enc;
|
253
|
+
rb_yield(Data_Wrap_Struct(rb_cNode, 0, free_node, node));
|
254
|
+
}
|
255
|
+
return self;
|
256
|
+
}
|
257
|
+
|
258
|
+
static VALUE
|
259
|
+
rb_node_prev(VALUE self)
|
260
|
+
{
|
261
|
+
Node* node;
|
262
|
+
Node* prev_node = ALLOC(Node);
|
263
|
+
mecab_node_t* m_node;
|
264
|
+
VALUE rb_cNode;
|
265
|
+
|
266
|
+
Data_Get_Struct(self, Node, node);
|
267
|
+
m_node = node->ptr->prev;
|
268
|
+
if (m_node == NULL) {
|
269
|
+
return Qnil;
|
270
|
+
} else {
|
271
|
+
prev_node->ptr = m_node;
|
272
|
+
prev_node->enc = node->enc;
|
273
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
274
|
+
return Data_Wrap_Struct(rb_cNode, 0, free_node, prev_node);
|
275
|
+
}
|
276
|
+
}
|
277
|
+
|
278
|
+
static VALUE
|
279
|
+
rb_node_next(VALUE self)
|
280
|
+
{
|
281
|
+
Node* node;
|
282
|
+
Node* next_node = ALLOC(Node);
|
283
|
+
mecab_node_t* m_node;
|
284
|
+
VALUE rb_cNode;
|
285
|
+
|
286
|
+
Data_Get_Struct(self, Node, node);
|
287
|
+
m_node = node->ptr->next;
|
288
|
+
if (m_node == NULL) {
|
289
|
+
return Qnil;
|
290
|
+
} else {
|
291
|
+
next_node->ptr = m_node;
|
292
|
+
next_node->enc = node->enc;
|
293
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
294
|
+
return Data_Wrap_Struct(rb_cNode, 0, free_node, next_node);
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
298
|
+
static VALUE
|
299
|
+
rb_node_get_surface(VALUE self)
|
300
|
+
{
|
301
|
+
Node* node;
|
302
|
+
VALUE surface;
|
303
|
+
|
304
|
+
Data_Get_Struct(self, Node, node);
|
305
|
+
surface = rb_str_new(node->ptr->surface, node->ptr->length);
|
306
|
+
return rb_enc_associate(surface, node->enc);
|
307
|
+
}
|
308
|
+
|
309
|
+
static VALUE
|
310
|
+
rb_node_get_feature(VALUE self)
|
311
|
+
{
|
312
|
+
Node* node;
|
313
|
+
VALUE feature;
|
314
|
+
|
315
|
+
Data_Get_Struct(self, Node, node);
|
316
|
+
feature = rb_str_new2(node->ptr->feature);
|
317
|
+
return rb_enc_associate(feature, node->enc);
|
318
|
+
}
|
319
|
+
|
320
|
+
void
|
321
|
+
Init_light()
|
322
|
+
{
|
323
|
+
VALUE rb_cModel, rb_cLattice, rb_cTagger, rb_cNode, rb_cResult;
|
324
|
+
|
325
|
+
rb_cModel = rb_define_class_under(name_space(), "Model", rb_cObject);
|
326
|
+
rb_cTagger = rb_define_class_under(name_space(), "Tagger", rb_cObject);
|
327
|
+
rb_cLattice = rb_define_class_under(name_space(), "Lattice", rb_cObject);
|
328
|
+
rb_cNode = rb_define_class_under(name_space(), "Node", rb_cObject);
|
329
|
+
rb_cResult = rb_define_class_under(name_space(), "Result", rb_cObject);
|
330
|
+
rb_define_alloc_func(rb_cModel, model_alloc);
|
331
|
+
rb_define_alloc_func(rb_cTagger, tagger_alloc);
|
332
|
+
rb_define_alloc_func(rb_cLattice, lattice_alloc);
|
333
|
+
rb_define_private_method(rb_cModel, "initialize", rb_model_initialize, 1);
|
334
|
+
rb_define_private_method(rb_cTagger, "initialize", rb_tagger_initialize, 1);
|
335
|
+
rb_define_private_method(rb_cLattice, "initialize", rb_lattice_initialize, 1);
|
336
|
+
rb_define_method(rb_cTagger, "parse", rb_tagger_parse, 1);
|
337
|
+
rb_define_method(rb_cLattice, "clear", rb_lattice_clear, 0);
|
338
|
+
rb_define_method(rb_cLattice, "available?", rb_lattice_available_p, 0);
|
339
|
+
rb_define_method(rb_cLattice, "bos_node", rb_lattice_get_bos_node, 0);
|
340
|
+
rb_define_method(rb_cLattice, "sentence", rb_lattice_get_sentence, 0);
|
341
|
+
rb_define_method(rb_cLattice, "sentence=", rb_lattice_set_sentence, 1);
|
342
|
+
rb_define_method(rb_cResult, "each", rb_result_each, 0);
|
343
|
+
rb_define_method(rb_cNode, "prev", rb_node_prev, 0);
|
344
|
+
rb_define_method(rb_cNode, "next", rb_node_next, 0);
|
345
|
+
rb_define_method(rb_cNode, "surface", rb_node_get_surface, 0);
|
346
|
+
rb_define_method(rb_cNode, "feature", rb_node_get_feature, 0);
|
347
|
+
rb_include_module(rb_cResult, rb_mEnumerable);
|
348
|
+
}
|
349
|
+
|
350
|
+
#ifdef __cplusplus
|
351
|
+
}
|
352
|
+
#endif
|
data/ext/mecab/mecab_wrap.cpp
CHANGED
@@ -432,6 +432,7 @@ SWIGINTERNINLINE int SWIG_CheckState(int r) {
|
|
432
432
|
#ifdef __cplusplus
|
433
433
|
extern "C" {
|
434
434
|
#endif
|
435
|
+
void Init_light(void);
|
435
436
|
|
436
437
|
typedef void *(*swig_converter_func)(void *, int *);
|
437
438
|
typedef struct swig_type_info *(*swig_dycast_func)(void **);
|
@@ -6836,5 +6837,7 @@ SWIGEXPORT void Init_mecab(void) {
|
|
6836
6837
|
SwigClassTagger.destroy = (void (*)(void *)) free_MeCab_Tagger;
|
6837
6838
|
SwigClassTagger.trackObjects = 0;
|
6838
6839
|
rb_define_const(mMeCab, "VERSION", SWIG_FromCharPtr("0.996"));
|
6840
|
+
|
6841
|
+
Init_light();
|
6839
6842
|
}
|
6840
6843
|
|
data/lib/mecab/heavy.rb
CHANGED
data/lib/mecab/heavy/modular.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mecab-heavy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.996.
|
4
|
+
version: 0.996.3dev
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tadashi Saito
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mini_portile
|
@@ -63,6 +63,7 @@ extra_rdoc_files: []
|
|
63
63
|
files:
|
64
64
|
- examples/test.rb
|
65
65
|
- ext/mecab/extconf.rb
|
66
|
+
- ext/mecab/light.c
|
66
67
|
- ext/mecab/mecab_wrap.cpp
|
67
68
|
- ext/mecab/parallel_make.rb
|
68
69
|
- ext/mecab/patch/prefix.patch
|
@@ -85,9 +86,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
86
|
version: '0'
|
86
87
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
88
|
requirements:
|
88
|
-
- - "
|
89
|
+
- - ">"
|
89
90
|
- !ruby/object:Gem::Version
|
90
|
-
version:
|
91
|
+
version: 1.3.1
|
91
92
|
requirements: []
|
92
93
|
rubyforge_project:
|
93
94
|
rubygems_version: 2.3.0
|