mmapscanner 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +6 -1
- data/ext/mmapscanner.c +124 -45
- data/spec/mmapscanner_spec.rb +57 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -43,13 +43,18 @@ Usage
|
|
43
43
|
* size, length は mmap(2) したサイズを返します。
|
44
44
|
* to_s は mmap(2) した領域を String で返します。Encoding は常に ASCII-8BIT です。
|
45
45
|
* slice は mmap(2) した領域の一部を新たな MmapScanner オブジェクトで返します。
|
46
|
-
* scan
|
46
|
+
* scan はポインタ位置で正規表現との一致を試みます。一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
|
47
|
+
* scan_until は scan と同じですが、現在のポインタの位置以降で一致を試みます。
|
47
48
|
* check は scan と同じですが、ポインタを進めません。
|
48
49
|
* skip は scan と同じですが、一致したバイト数を返します。
|
49
50
|
* match? は check と同じですが、一致したバイト数を返します。
|
50
51
|
* peek は指定したバイト数分のデータを返します。ポインタは進みません。
|
51
52
|
* eos? はポインタが末尾に達していると true を返します。
|
52
53
|
* rest はポインタ以降のデータを返します。
|
54
|
+
* matched は正規表現に一致した部分を MmapScanner オブジェクトで返します。
|
55
|
+
* matched(n) は正規表現の n番目の括弧に一致した部分を MmapScanner オブジェクトで返します。
|
56
|
+
* pos は現在のポインタの位置を返します。
|
57
|
+
* pos= でポインタ位置を変更することができます。
|
53
58
|
|
54
59
|
Copyright
|
55
60
|
---------
|
data/ext/mmapscanner.c
CHANGED
@@ -13,6 +13,15 @@ typedef struct {
|
|
13
13
|
size_t size;
|
14
14
|
} mmap_data_t;
|
15
15
|
|
16
|
+
typedef struct {
|
17
|
+
size_t offset;
|
18
|
+
size_t size;
|
19
|
+
size_t pos;
|
20
|
+
int matched;
|
21
|
+
size_t matched_pos;
|
22
|
+
struct re_registers regs;
|
23
|
+
} mmapscanner_t;
|
24
|
+
|
16
25
|
static void mmap_free(mmap_data_t *data)
|
17
26
|
{
|
18
27
|
if (data->ptr)
|
@@ -33,12 +42,31 @@ static VALUE create_mmap_object(int fd, size_t offset, size_t size)
|
|
33
42
|
return Data_Wrap_Struct(cMmap, 0, mmap_free, data);
|
34
43
|
}
|
35
44
|
|
45
|
+
static void mmapscanner_free(mmapscanner_t *ms)
|
46
|
+
{
|
47
|
+
onig_region_free(&ms->regs, 0);
|
48
|
+
free(ms);
|
49
|
+
}
|
50
|
+
|
51
|
+
VALUE allocate(VALUE klass)
|
52
|
+
{
|
53
|
+
mmapscanner_t *ms;
|
54
|
+
ms = malloc(sizeof *ms);
|
55
|
+
ms->offset = 0;
|
56
|
+
ms->size = 0;
|
57
|
+
ms->pos = 0;
|
58
|
+
ms->matched_pos = 0;
|
59
|
+
onig_region_init(&ms->regs);
|
60
|
+
return Data_Wrap_Struct(klass, 0, mmapscanner_free, ms);
|
61
|
+
}
|
62
|
+
|
36
63
|
static VALUE initialize(int argc, VALUE *argv, VALUE obj)
|
37
64
|
{
|
38
65
|
VALUE src, voffset, vsize;
|
39
66
|
size_t offset, size;
|
40
67
|
size_t src_offset, src_size;
|
41
68
|
VALUE src_data;
|
69
|
+
mmapscanner_t *ms;
|
42
70
|
|
43
71
|
rb_scan_args(argc, argv, "12", &src, &voffset, &vsize);
|
44
72
|
if (voffset != Qnil && NUM2LL(voffset) < 0)
|
@@ -47,8 +75,9 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
|
|
47
75
|
rb_raise(rb_eRangeError, "length out of range: %lld", NUM2LL(vsize));
|
48
76
|
offset = voffset == Qnil ? 0 : NUM2SIZET(voffset);
|
49
77
|
if (rb_obj_class(src) == cMmapScanner) {
|
50
|
-
|
51
|
-
|
78
|
+
Data_Get_Struct(src, mmapscanner_t, ms);
|
79
|
+
src_offset = ms->offset;
|
80
|
+
src_size = ms->size;
|
52
81
|
src_data = rb_iv_get(src, "data");
|
53
82
|
} else if (TYPE(src) == T_FILE) {
|
54
83
|
int fd;
|
@@ -68,27 +97,35 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
|
|
68
97
|
} else {
|
69
98
|
rb_raise(rb_eTypeError, "wrong argument type %s (expected File/String/MmapScanner)", rb_obj_classname(src));
|
70
99
|
}
|
71
|
-
if (offset
|
72
|
-
rb_raise(rb_eRangeError, "length out of range: %zu
|
100
|
+
if (offset > src_size)
|
101
|
+
rb_raise(rb_eRangeError, "length out of range: %zu > %zu", offset, src_size);
|
73
102
|
size = vsize == Qnil ? src_size - offset : NUM2SIZET(vsize);
|
74
103
|
if (size > src_size - offset)
|
75
104
|
size = src_size - offset;
|
76
|
-
|
77
|
-
|
105
|
+
|
106
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
107
|
+
ms->offset = src_offset + offset;
|
108
|
+
ms->size = size;
|
109
|
+
ms->pos = 0;
|
110
|
+
ms->matched = 0;
|
111
|
+
ms->matched_pos = 0;
|
78
112
|
rb_iv_set(obj, "data", src_data);
|
79
|
-
rb_iv_set(obj, "pos", INT2NUM(0));
|
80
113
|
return Qnil;
|
81
114
|
}
|
82
115
|
|
83
116
|
static VALUE size(VALUE obj)
|
84
117
|
{
|
85
|
-
|
118
|
+
mmapscanner_t *ms;
|
119
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
120
|
+
return SIZET2NUM(ms->size);
|
86
121
|
}
|
87
122
|
|
88
123
|
static VALUE to_s(VALUE obj)
|
89
124
|
{
|
90
|
-
|
91
|
-
|
125
|
+
mmapscanner_t *ms;
|
126
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
127
|
+
size_t offset = ms->offset;
|
128
|
+
size_t size = ms->size;
|
92
129
|
VALUE data = rb_iv_get(obj, "data");
|
93
130
|
mmap_data_t *mdata;
|
94
131
|
|
@@ -110,30 +147,35 @@ static VALUE inspect(VALUE obj)
|
|
110
147
|
|
111
148
|
static VALUE pos(VALUE obj)
|
112
149
|
{
|
113
|
-
|
150
|
+
mmapscanner_t *ms;
|
151
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
152
|
+
return SIZET2NUM(ms->pos);
|
114
153
|
}
|
115
154
|
|
116
155
|
static VALUE set_pos(VALUE obj, VALUE pos)
|
117
156
|
{
|
157
|
+
mmapscanner_t *ms;
|
158
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
118
159
|
size_t p, size;
|
119
160
|
|
120
161
|
if (NUM2LL(pos) < 0)
|
121
162
|
rb_raise(rb_eRangeError, "out of range: %lld", NUM2LL(pos));
|
122
163
|
p = NUM2SIZET(pos);
|
123
|
-
size =
|
164
|
+
size = ms->size;
|
124
165
|
if (p > size)
|
125
166
|
rb_raise(rb_eRangeError, "out of range: %zu > %zu", p, size);
|
126
|
-
|
167
|
+
ms->pos = p;
|
127
168
|
return pos;
|
128
169
|
}
|
129
170
|
|
130
|
-
static VALUE scan_sub(VALUE obj, VALUE re, int forward)
|
171
|
+
static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeonly)
|
131
172
|
{
|
173
|
+
mmapscanner_t *ms;
|
174
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
132
175
|
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
133
176
|
regex_t *reg;
|
134
177
|
int tmpreg;
|
135
178
|
int result;
|
136
|
-
struct re_registers regs;
|
137
179
|
size_t old_pos, matched_len;
|
138
180
|
char *ptr;
|
139
181
|
size_t pos, size;
|
@@ -141,8 +183,8 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
|
|
141
183
|
mmap_data_t *mdata;
|
142
184
|
|
143
185
|
Check_Type(re, T_REGEXP);
|
144
|
-
pos =
|
145
|
-
size =
|
186
|
+
pos = ms->pos;
|
187
|
+
size = ms->size;
|
146
188
|
if (pos >= size)
|
147
189
|
return Qnil;
|
148
190
|
data = rb_iv_get(obj, "data");
|
@@ -152,17 +194,24 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
|
|
152
194
|
Data_Get_Struct(data, mmap_data_t, mdata);
|
153
195
|
ptr = mdata->ptr;
|
154
196
|
}
|
155
|
-
ptr +=
|
197
|
+
ptr += ms->offset;
|
156
198
|
|
157
199
|
reg = rb_reg_prepare_re(re, rb_str_new("", 0));
|
158
200
|
tmpreg = reg != RREGEXP(re)->ptr;
|
159
201
|
if (!tmpreg) RREGEXP(re)->usecnt++;
|
160
202
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
203
|
+
if (headonly) {
|
204
|
+
result = onig_match(reg, (UChar*)(ptr+pos),
|
205
|
+
(UChar*)(ptr+size),
|
206
|
+
(UChar*)(ptr+pos),
|
207
|
+
&ms->regs, ONIG_OPTION_NONE);
|
208
|
+
} else {
|
209
|
+
result = onig_search(reg, (UChar*)(ptr+pos),
|
210
|
+
(UChar*)(ptr+size),
|
211
|
+
(UChar*)(ptr+pos),
|
212
|
+
(UChar*)(ptr+size),
|
213
|
+
&ms->regs, ONIG_OPTION_NONE);
|
214
|
+
}
|
166
215
|
if (!tmpreg) RREGEXP(re)->usecnt--;
|
167
216
|
if (tmpreg) {
|
168
217
|
if (RREGEXP(re)->usecnt) {
|
@@ -175,65 +224,93 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
|
|
175
224
|
if (result < 0)
|
176
225
|
return Qnil;
|
177
226
|
old_pos = pos;
|
178
|
-
matched_len = regs.end[0];
|
227
|
+
matched_len = ms->regs.end[0];
|
179
228
|
if (forward) {
|
180
229
|
pos += matched_len;
|
181
|
-
|
230
|
+
ms->pos = pos;
|
182
231
|
}
|
183
|
-
|
232
|
+
ms->matched = 1;
|
233
|
+
ms->matched_pos = old_pos;
|
234
|
+
|
235
|
+
if (sizeonly)
|
236
|
+
return SIZET2NUM(matched_len);
|
237
|
+
return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(old_pos), SIZET2NUM(matched_len));
|
184
238
|
}
|
185
239
|
|
186
240
|
static VALUE scan(VALUE obj, VALUE re)
|
187
241
|
{
|
188
|
-
return scan_sub(obj, re, 1);
|
242
|
+
return scan_sub(obj, re, 1, 1, 0);
|
243
|
+
}
|
244
|
+
|
245
|
+
static VALUE scan_until(VALUE obj, VALUE re)
|
246
|
+
{
|
247
|
+
return scan_sub(obj, re, 1, 0, 0);
|
189
248
|
}
|
190
249
|
|
191
250
|
static VALUE check(VALUE obj, VALUE re)
|
192
251
|
{
|
193
|
-
return scan_sub(obj, re, 0);
|
252
|
+
return scan_sub(obj, re, 0, 1, 0);
|
194
253
|
}
|
195
254
|
|
196
255
|
static VALUE skip(VALUE obj, VALUE re)
|
197
256
|
{
|
198
|
-
|
199
|
-
if (ret == Qnil)
|
200
|
-
return ret;
|
201
|
-
return rb_iv_get(ret, "size");
|
257
|
+
return scan_sub(obj, re, 1, 1, 1);
|
202
258
|
}
|
203
259
|
|
204
260
|
static VALUE match_p(VALUE obj, VALUE re)
|
205
261
|
{
|
206
|
-
|
207
|
-
if (ret == Qnil)
|
208
|
-
return ret;
|
209
|
-
return rb_iv_get(ret, "size");
|
262
|
+
return scan_sub(obj, re, 0, 1, 1);
|
210
263
|
}
|
211
264
|
|
212
265
|
static VALUE peek(VALUE obj, VALUE size)
|
213
266
|
{
|
267
|
+
mmapscanner_t *ms;
|
268
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
214
269
|
size_t sz = NUM2SIZET(size);
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
sz = data_size - data_pos;
|
219
|
-
return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(data_pos), SIZET2NUM(sz));
|
270
|
+
if (sz > ms->size - ms->pos)
|
271
|
+
sz = ms->size - ms->pos;
|
272
|
+
return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(ms->pos), SIZET2NUM(sz));
|
220
273
|
}
|
221
274
|
|
222
275
|
static VALUE eos_p(VALUE obj)
|
223
276
|
{
|
224
|
-
|
225
|
-
|
226
|
-
return
|
277
|
+
mmapscanner_t *ms;
|
278
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
279
|
+
return ms->pos >= ms->size ? Qtrue : Qfalse;
|
227
280
|
}
|
228
281
|
|
229
282
|
static VALUE rest(VALUE obj)
|
230
283
|
{
|
231
|
-
|
284
|
+
mmapscanner_t *ms;
|
285
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
286
|
+
return rb_funcall(cMmapScanner, rb_intern("new"), 2, obj, SIZET2NUM(ms->pos));
|
287
|
+
}
|
288
|
+
|
289
|
+
static VALUE matched(int argc, VALUE *argv, VALUE obj)
|
290
|
+
{
|
291
|
+
mmapscanner_t *ms;
|
292
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
293
|
+
VALUE nth;
|
294
|
+
int i = 0;
|
295
|
+
size_t pos, len;
|
296
|
+
|
297
|
+
if (rb_scan_args(argc, argv, "01", &nth) == 1)
|
298
|
+
i = NUM2LONG(nth);
|
299
|
+
if (ms->matched == 0)
|
300
|
+
return Qnil;
|
301
|
+
if (i < 0)
|
302
|
+
return Qnil;
|
303
|
+
if (i >= ms->regs.num_regs)
|
304
|
+
return Qnil;
|
305
|
+
pos = ms->matched_pos + ms->regs.beg[i];
|
306
|
+
len = ms->regs.end[i] - ms->regs.beg[i];
|
307
|
+
return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(pos), SIZET2NUM(len));
|
232
308
|
}
|
233
309
|
|
234
310
|
void Init_mmapscanner(void)
|
235
311
|
{
|
236
312
|
cMmapScanner = rb_define_class("MmapScanner", rb_cObject);
|
313
|
+
rb_define_alloc_func(cMmapScanner, allocate);
|
237
314
|
rb_define_method(cMmapScanner, "initialize", initialize, -1);
|
238
315
|
rb_define_method(cMmapScanner, "size", size, 0);
|
239
316
|
rb_define_method(cMmapScanner, "length", size, 0);
|
@@ -244,12 +321,14 @@ void Init_mmapscanner(void)
|
|
244
321
|
rb_define_method(cMmapScanner, "pos", pos, 0);
|
245
322
|
rb_define_method(cMmapScanner, "pos=", set_pos, 1);
|
246
323
|
rb_define_method(cMmapScanner, "scan", scan, 1);
|
324
|
+
rb_define_method(cMmapScanner, "scan_until", scan_until, 1);
|
247
325
|
rb_define_method(cMmapScanner, "check", check, 1);
|
248
326
|
rb_define_method(cMmapScanner, "skip", skip, 1);
|
249
327
|
rb_define_method(cMmapScanner, "match?", match_p, 1);
|
250
328
|
rb_define_method(cMmapScanner, "peek", peek, 1);
|
251
329
|
rb_define_method(cMmapScanner, "eos?", eos_p, 0);
|
252
330
|
rb_define_method(cMmapScanner, "rest", rest, 0);
|
331
|
+
rb_define_method(cMmapScanner, "matched", matched, -1);
|
253
332
|
|
254
333
|
cMmap = rb_define_class_under(cMmapScanner, "Mmap", rb_cObject);
|
255
334
|
}
|
data/spec/mmapscanner_spec.rb
CHANGED
@@ -51,6 +51,21 @@ describe MmapScanner do
|
|
51
51
|
subject.pos.should == 10
|
52
52
|
end
|
53
53
|
end
|
54
|
+
describe '#scan_until' do
|
55
|
+
it 'returns matched data as MmapScanner' do
|
56
|
+
subject.scan(/012/)
|
57
|
+
ret = subject.scan_until(/678/)
|
58
|
+
ret.class.should == MmapScanner
|
59
|
+
ret.to_s.should == '345678'
|
60
|
+
end
|
61
|
+
it 'returns nil if not matched' do
|
62
|
+
subject.scan_until(/321/).should be_nil
|
63
|
+
end
|
64
|
+
it 'forward current position' do
|
65
|
+
subject.scan_until(/456/)
|
66
|
+
subject.pos.should == 7
|
67
|
+
end
|
68
|
+
end
|
54
69
|
describe '#check' do
|
55
70
|
it 'returns matched data as MmapScanner' do
|
56
71
|
ret = subject.check(/\d{10}/)
|
@@ -89,6 +104,30 @@ describe MmapScanner do
|
|
89
104
|
subject.pos.should == 0
|
90
105
|
end
|
91
106
|
end
|
107
|
+
describe '#matched' do
|
108
|
+
it 'returns matched data after scan' do
|
109
|
+
subject.scan(/\d{6}/)
|
110
|
+
subject.matched.to_s.should == '012345'
|
111
|
+
end
|
112
|
+
it 'returns matched data after scan_until' do
|
113
|
+
subject.scan_until(/4567/)
|
114
|
+
subject.matched.to_s.should == '4567'
|
115
|
+
end
|
116
|
+
it 'returns nil if there is not matched data' do
|
117
|
+
subject.matched.should be_nil
|
118
|
+
end
|
119
|
+
end
|
120
|
+
describe '#matched(nth)' do
|
121
|
+
it 'returns nth part of matched string' do
|
122
|
+
subject.scan(/(..)(..)(..)/)
|
123
|
+
subject.matched(0).to_s.should == '012345'
|
124
|
+
subject.matched(1).to_s.should == '01'
|
125
|
+
subject.matched(2).to_s.should == '23'
|
126
|
+
subject.matched(3).to_s.should == '45'
|
127
|
+
subject.matched(4).should be_nil
|
128
|
+
subject.matched(-1).should be_nil
|
129
|
+
end
|
130
|
+
end
|
92
131
|
describe '#peek' do
|
93
132
|
it 'returns MmapScanner' do
|
94
133
|
subject.peek(10).should be_instance_of MmapScanner
|
@@ -115,6 +154,10 @@ describe MmapScanner do
|
|
115
154
|
ret.should be_instance_of MmapScanner
|
116
155
|
ret.to_s.should == '789'
|
117
156
|
end
|
157
|
+
it 'returns empty MmapScanner if it reached to end' do
|
158
|
+
subject.pos = 10000
|
159
|
+
subject.rest.to_s.should == ''
|
160
|
+
end
|
118
161
|
end
|
119
162
|
describe '.new with position' do
|
120
163
|
it '#size is length of rest data' do
|
@@ -152,6 +195,13 @@ describe MmapScanner do
|
|
152
195
|
let(:src){'0123456789'*1020}
|
153
196
|
subject{MmapScanner.new(src, 100, 10000)}
|
154
197
|
it_should_behave_like 'MmapScanner'
|
198
|
+
describe '.new with empty source' do
|
199
|
+
it 'returns empty MmapScanner' do
|
200
|
+
m = MmapScanner.new('')
|
201
|
+
m.size.should == 0
|
202
|
+
m.to_s.should be_empty
|
203
|
+
end
|
204
|
+
end
|
155
205
|
end
|
156
206
|
|
157
207
|
context 'with MmapScanner' do
|
@@ -163,5 +213,12 @@ describe MmapScanner do
|
|
163
213
|
let(:src){MmapScanner.new(@file)}
|
164
214
|
subject{MmapScanner.new(src, 100, 10000)}
|
165
215
|
it_should_behave_like 'MmapScanner'
|
216
|
+
describe '.new with empty source' do
|
217
|
+
it 'returns empty MmapScanner' do
|
218
|
+
m = MmapScanner.new(src, 1020, 0)
|
219
|
+
m.size.should == 0
|
220
|
+
m.to_s.should be_empty
|
221
|
+
end
|
222
|
+
end
|
166
223
|
end
|
167
224
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: mmapscanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: "0.
|
5
|
+
version: "0.3"
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- TOMITA Masahiro
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-03-
|
13
|
+
date: 2011-03-21 00:00:00 +09:00
|
14
14
|
default_executable:
|
15
15
|
dependencies: []
|
16
16
|
|