mmapscanner 0.3.7 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +14 -0
- data/ext/mmapscanner.c +65 -13
- data/spec/mmapscanner_spec.rb +117 -0
- metadata +4 -3
data/README.md
CHANGED
@@ -46,8 +46,21 @@ Usage
|
|
46
46
|
* scan はポインタ位置で正規表現との一致を試みます。一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
|
47
47
|
* scan_until は scan と同じですが、現在のポインタの位置以降で一致を試みます。
|
48
48
|
* check は scan と同じですが、ポインタを進めません。
|
49
|
+
* check_until は check と同じですが、現在のポインタの位置以降で一致を試みます。
|
49
50
|
* skip は scan と同じですが、一致したバイト数を返します。
|
51
|
+
* skip_until は skip と同じですが、現在のポインタの位置以降で一致を試みます。
|
50
52
|
* match? は check と同じですが、一致したバイト数を返します。
|
53
|
+
* exist? は match? と同じですが、現在のポインタの位置以降で一致を試みます。
|
54
|
+
* scan_full(re, s, f) はポインタの位置でスキャンします。
|
55
|
+
* scan_full(re, true, true) は scan(re) と同じです。
|
56
|
+
* scan_full(re, true, false) は skip(re) と同じです。
|
57
|
+
* scan_full(re, false, true) は check(re) と同じです。
|
58
|
+
* scan_full(re, false, false) は match?(re) と同じです。
|
59
|
+
* search_full(re, s, f) はポインタの位置以降でスキャンします。
|
60
|
+
* search_full(re, true, true) は scan_until(re) と同じです。
|
61
|
+
* search_full(re, true, false) は skip_until(re) と同じです。
|
62
|
+
* search_full(re, false, true) は check_until(re) と同じです。
|
63
|
+
* search_full(re, false, false) は exist?(re) と同じです。
|
51
64
|
* peek は指定したバイト数分のデータを返します。ポインタは進みません。
|
52
65
|
* eos? はポインタが末尾に達していると true を返します。
|
53
66
|
* rest はポインタ以降のデータを返します。
|
@@ -56,6 +69,7 @@ Usage
|
|
56
69
|
* matched_str は matched と同じですが、文字列を返します。
|
57
70
|
* pos は現在のポインタの位置を返します。
|
58
71
|
* pos= でポインタ位置を変更することができます。
|
72
|
+
* terminate はポインタを末尾に移動します。
|
59
73
|
|
60
74
|
Copyright
|
61
75
|
---------
|
data/ext/mmapscanner.c
CHANGED
@@ -53,12 +53,16 @@ static VALUE mmap_allocate(VALUE klass)
|
|
53
53
|
static VALUE mmap_initialize(int argc, VALUE *argv, VALUE obj)
|
54
54
|
{
|
55
55
|
mmap_data_t *data;
|
56
|
-
Data_Get_Struct(obj, mmap_data_t, data);
|
57
|
-
if (data->ptr)
|
58
|
-
rb_raise(rb_eRuntimeError, "already mapped");
|
59
56
|
VALUE file, voffset, vlength;
|
60
57
|
off_t offset = 0;
|
61
58
|
size_t length = 0;
|
59
|
+
int fd;
|
60
|
+
struct stat st;
|
61
|
+
void *ptr;
|
62
|
+
|
63
|
+
Data_Get_Struct(obj, mmap_data_t, data);
|
64
|
+
if (data->ptr)
|
65
|
+
rb_raise(rb_eRuntimeError, "already mapped");
|
62
66
|
rb_scan_args(argc, argv, "12", &file, &voffset, &vlength);
|
63
67
|
if (TYPE(file) != T_FILE)
|
64
68
|
rb_raise(rb_eTypeError, "File object required");
|
@@ -66,15 +70,13 @@ static VALUE mmap_initialize(int argc, VALUE *argv, VALUE obj)
|
|
66
70
|
rb_raise(rb_eRangeError, "offset out of range: %lld", NUM2LL(voffset));
|
67
71
|
if (vlength != Qnil && NUM2LL(vlength) < 0)
|
68
72
|
rb_raise(rb_eRangeError, "length out of range: %lld", NUM2LL(vlength));
|
69
|
-
|
70
|
-
struct stat st;
|
73
|
+
fd = FIX2INT(rb_funcall(file, rb_intern("fileno"), 0));
|
71
74
|
if (fstat(fd, &st) < 0)
|
72
75
|
rb_sys_fail("fstat");
|
73
76
|
offset = voffset == Qnil ? 0 : NUM2SIZET(voffset);
|
74
77
|
length = vlength == Qnil ? st.st_size : NUM2SIZET(vlength);
|
75
78
|
if (offset + length > st.st_size)
|
76
79
|
length = st.st_size - offset;
|
77
|
-
void *ptr;
|
78
80
|
if ((ptr = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset)) == MAP_FAILED)
|
79
81
|
rb_sys_fail("mmap");
|
80
82
|
|
@@ -159,7 +161,16 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
|
|
159
161
|
src = ms->data;
|
160
162
|
src_size_defined = 1;
|
161
163
|
} else if (TYPE(src) == T_FILE) {
|
162
|
-
|
164
|
+
int fd = FIX2INT(rb_funcall(src, rb_intern("fileno"), 0));
|
165
|
+
struct stat st;
|
166
|
+
if (fstat(fd, &st) < 0)
|
167
|
+
rb_sys_fail("fstat");
|
168
|
+
if (st.st_size == 0) {
|
169
|
+
src = rb_str_new(NULL, 0);
|
170
|
+
src_size = 0;
|
171
|
+
} else {
|
172
|
+
src = rb_funcall(cMmap, rb_intern("new"), 1, src);
|
173
|
+
}
|
163
174
|
}
|
164
175
|
if (rb_obj_class(src) == cMmap) {
|
165
176
|
if (!src_size_defined) {
|
@@ -260,8 +271,8 @@ static VALUE pos(VALUE obj)
|
|
260
271
|
static VALUE set_pos(VALUE obj, VALUE pos)
|
261
272
|
{
|
262
273
|
mmapscanner_t *ms;
|
263
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
264
274
|
size_t p, size;
|
275
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
265
276
|
|
266
277
|
if (NUM2LL(pos) < 0)
|
267
278
|
rb_raise(rb_eRangeError, "out of range: %lld", NUM2LL(pos));
|
@@ -276,7 +287,6 @@ static VALUE set_pos(VALUE obj, VALUE pos)
|
|
276
287
|
static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeonly)
|
277
288
|
{
|
278
289
|
mmapscanner_t *ms;
|
279
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
280
290
|
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
281
291
|
regex_t *reg;
|
282
292
|
int tmpreg;
|
@@ -285,6 +295,7 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeon
|
|
285
295
|
char *ptr;
|
286
296
|
mmap_data_t *mdata;
|
287
297
|
|
298
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
288
299
|
ms->matched = 0;
|
289
300
|
Check_Type(re, T_REGEXP);
|
290
301
|
if (ms->pos > ms->size)
|
@@ -355,6 +366,18 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeon
|
|
355
366
|
return create_from_mmapscanner(obj, old_pos, matched_len);
|
356
367
|
}
|
357
368
|
|
369
|
+
static VALUE scan_full(VALUE obj, VALUE re, VALUE forward, VALUE ret_ms)
|
370
|
+
{
|
371
|
+
return scan_sub(obj, re, (forward != Qnil && forward != Qfalse), 1,
|
372
|
+
(ret_ms == Qnil || ret_ms == Qfalse));
|
373
|
+
}
|
374
|
+
|
375
|
+
static VALUE search_full(VALUE obj, VALUE re, VALUE forward, VALUE ret_ms)
|
376
|
+
{
|
377
|
+
return scan_sub(obj, re, (forward != Qnil && forward != Qfalse), 0,
|
378
|
+
(ret_ms == Qnil || ret_ms == Qfalse));
|
379
|
+
}
|
380
|
+
|
358
381
|
static VALUE scan(VALUE obj, VALUE re)
|
359
382
|
{
|
360
383
|
return scan_sub(obj, re, 1, 1, 0);
|
@@ -370,21 +393,36 @@ static VALUE check(VALUE obj, VALUE re)
|
|
370
393
|
return scan_sub(obj, re, 0, 1, 0);
|
371
394
|
}
|
372
395
|
|
396
|
+
static VALUE check_until(VALUE obj, VALUE re)
|
397
|
+
{
|
398
|
+
return scan_sub(obj, re, 0, 0, 0);
|
399
|
+
}
|
400
|
+
|
373
401
|
static VALUE skip(VALUE obj, VALUE re)
|
374
402
|
{
|
375
403
|
return scan_sub(obj, re, 1, 1, 1);
|
376
404
|
}
|
377
405
|
|
406
|
+
static VALUE skip_until(VALUE obj, VALUE re)
|
407
|
+
{
|
408
|
+
return scan_sub(obj, re, 1, 0, 1);
|
409
|
+
}
|
410
|
+
|
378
411
|
static VALUE match_p(VALUE obj, VALUE re)
|
379
412
|
{
|
380
413
|
return scan_sub(obj, re, 0, 1, 1);
|
381
414
|
}
|
382
415
|
|
416
|
+
static VALUE exist_p(VALUE obj, VALUE re)
|
417
|
+
{
|
418
|
+
return scan_sub(obj, re, 0, 0, 1);
|
419
|
+
}
|
420
|
+
|
383
421
|
static VALUE peek(VALUE obj, VALUE size)
|
384
422
|
{
|
385
423
|
mmapscanner_t *ms;
|
386
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
387
424
|
size_t sz = NUM2SIZET(size);
|
425
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
388
426
|
if (sz > ms->size - ms->pos)
|
389
427
|
sz = ms->size - ms->pos;
|
390
428
|
return create_from_mmapscanner(obj, ms->pos, sz);
|
@@ -404,7 +442,7 @@ static VALUE rest(VALUE obj)
|
|
404
442
|
return create_from_mmapscanner(obj, ms->pos, ms->size - ms->pos);
|
405
443
|
}
|
406
444
|
|
407
|
-
static int matched_sub(int argc, VALUE *argv, mmapscanner_t *ms,
|
445
|
+
static int matched_sub(int argc, VALUE *argv, mmapscanner_t *ms, size_t *pos, size_t *len)
|
408
446
|
{
|
409
447
|
int i = 0;
|
410
448
|
if (ms->matched == 0)
|
@@ -429,8 +467,8 @@ static int matched_sub(int argc, VALUE *argv, mmapscanner_t *ms, int *pos, int *
|
|
429
467
|
static VALUE matched(int argc, VALUE *argv, VALUE obj)
|
430
468
|
{
|
431
469
|
mmapscanner_t *ms;
|
432
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
433
470
|
size_t pos, len;
|
471
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
434
472
|
if (matched_sub(argc, argv, ms, &pos, &len) == 0)
|
435
473
|
return Qnil;
|
436
474
|
return create_from_mmapscanner(obj, pos, len);
|
@@ -439,9 +477,9 @@ static VALUE matched(int argc, VALUE *argv, VALUE obj)
|
|
439
477
|
static VALUE matched_str(int argc, VALUE *argv, VALUE obj)
|
440
478
|
{
|
441
479
|
mmapscanner_t *ms;
|
442
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
443
480
|
mmap_data_t *mdata;
|
444
481
|
size_t pos, len;
|
482
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
445
483
|
if (matched_sub(argc, argv, ms, &pos, &len) == 0)
|
446
484
|
return Qnil;
|
447
485
|
if (TYPE(ms->data) == T_STRING)
|
@@ -452,6 +490,14 @@ static VALUE matched_str(int argc, VALUE *argv, VALUE obj)
|
|
452
490
|
return rb_str_new(mdata->ptr+ms->offset+pos, len);
|
453
491
|
}
|
454
492
|
|
493
|
+
static VALUE terminate(VALUE obj)
|
494
|
+
{
|
495
|
+
mmapscanner_t *ms;
|
496
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
497
|
+
ms->pos = ms->size;
|
498
|
+
return obj;
|
499
|
+
}
|
500
|
+
|
455
501
|
void Init_mmapscanner(void)
|
456
502
|
{
|
457
503
|
cMmapScanner = rb_define_class("MmapScanner", rb_cObject);
|
@@ -466,16 +512,22 @@ void Init_mmapscanner(void)
|
|
466
512
|
rb_define_method(cMmapScanner, "inspect", inspect, 0);
|
467
513
|
rb_define_method(cMmapScanner, "pos", pos, 0);
|
468
514
|
rb_define_method(cMmapScanner, "pos=", set_pos, 1);
|
515
|
+
rb_define_method(cMmapScanner, "scan_full", scan_full, 3);
|
516
|
+
rb_define_method(cMmapScanner, "search_full", search_full, 3);
|
469
517
|
rb_define_method(cMmapScanner, "scan", scan, 1);
|
470
518
|
rb_define_method(cMmapScanner, "scan_until", scan_until, 1);
|
471
519
|
rb_define_method(cMmapScanner, "check", check, 1);
|
520
|
+
rb_define_method(cMmapScanner, "check_until", check_until, 1);
|
472
521
|
rb_define_method(cMmapScanner, "skip", skip, 1);
|
522
|
+
rb_define_method(cMmapScanner, "skip_until", skip_until, 1);
|
473
523
|
rb_define_method(cMmapScanner, "match?", match_p, 1);
|
524
|
+
rb_define_method(cMmapScanner, "exist?", exist_p, 1);
|
474
525
|
rb_define_method(cMmapScanner, "peek", peek, 1);
|
475
526
|
rb_define_method(cMmapScanner, "eos?", eos_p, 0);
|
476
527
|
rb_define_method(cMmapScanner, "rest", rest, 0);
|
477
528
|
rb_define_method(cMmapScanner, "matched", matched, -1);
|
478
529
|
rb_define_method(cMmapScanner, "matched_str", matched_str, -1);
|
530
|
+
rb_define_method(cMmapScanner, "terminate", terminate, 0);
|
479
531
|
|
480
532
|
cMmap = rb_define_class_under(cMmapScanner, "Mmap", rb_cObject);
|
481
533
|
rb_define_alloc_func(cMmap, mmap_allocate);
|
data/spec/mmapscanner_spec.rb
CHANGED
@@ -105,6 +105,20 @@ describe MmapScanner do
|
|
105
105
|
subject.pos.should == 0
|
106
106
|
end
|
107
107
|
end
|
108
|
+
describe '#check_until' do
|
109
|
+
it 'returns matched data as MmapScanner' do
|
110
|
+
ret = subject.check_until(/123/)
|
111
|
+
ret.class.should == MmapScanner
|
112
|
+
ret.to_s.should == '0123'
|
113
|
+
end
|
114
|
+
it 'returns nil if not matched' do
|
115
|
+
subject.check_until(/abc/).should be_nil
|
116
|
+
end
|
117
|
+
it 'do not forward current position' do
|
118
|
+
ret = subject.check_until(/123/)
|
119
|
+
subject.pos.should == 0
|
120
|
+
end
|
121
|
+
end
|
108
122
|
describe '#skip' do
|
109
123
|
it 'returns length of matched data' do
|
110
124
|
subject.skip(/\d{10}/).should == 10
|
@@ -117,6 +131,18 @@ describe MmapScanner do
|
|
117
131
|
subject.pos.should == 10
|
118
132
|
end
|
119
133
|
end
|
134
|
+
describe '#skip_until' do
|
135
|
+
it 'returns length of matched data' do
|
136
|
+
subject.skip_until(/123/).should == 4
|
137
|
+
end
|
138
|
+
it 'returns nil if not matched' do
|
139
|
+
subject.skip_until(/abc/).should be_nil
|
140
|
+
end
|
141
|
+
it 'forward current position' do
|
142
|
+
subject.skip_until(/123/)
|
143
|
+
subject.pos.should == 4
|
144
|
+
end
|
145
|
+
end
|
120
146
|
describe '#match?' do
|
121
147
|
it 'returns length of matched data' do
|
122
148
|
subject.match?(/\d{10}/).should == 10
|
@@ -129,6 +155,18 @@ describe MmapScanner do
|
|
129
155
|
subject.pos.should == 0
|
130
156
|
end
|
131
157
|
end
|
158
|
+
describe '#exist?' do
|
159
|
+
it 'returns length of matched data' do
|
160
|
+
subject.exist?(/123/).should == 4
|
161
|
+
end
|
162
|
+
it 'returns nil if not matched' do
|
163
|
+
subject.exist?(/abc/).should be_nil
|
164
|
+
end
|
165
|
+
it 'do not forward current position' do
|
166
|
+
subject.exist?(/123/)
|
167
|
+
subject.pos.should == 0
|
168
|
+
end
|
169
|
+
end
|
132
170
|
describe '#matched' do
|
133
171
|
it 'returns matched data after scan' do
|
134
172
|
subject.scan(/\d{6}/)
|
@@ -173,6 +211,64 @@ describe MmapScanner do
|
|
173
211
|
subject.eos?.should == false
|
174
212
|
end
|
175
213
|
end
|
214
|
+
describe '#scan_full(re, true, true)' do
|
215
|
+
it 'is same as #scan' do
|
216
|
+
ret = subject.scan_full(/\d{10}/, true, true)
|
217
|
+
ret.class.should == MmapScanner
|
218
|
+
ret.to_s.should == '0123456789'
|
219
|
+
subject.pos.should == 10
|
220
|
+
end
|
221
|
+
end
|
222
|
+
describe '#scan_full(re, true, false)' do
|
223
|
+
it 'is same as #skip' do
|
224
|
+
ret = subject.scan_full(/\d{10}/, true, false)
|
225
|
+
ret.should == 10
|
226
|
+
subject.pos.should == 10
|
227
|
+
end
|
228
|
+
end
|
229
|
+
describe '#scan_full(re, false, true)' do
|
230
|
+
it 'is same as #check' do
|
231
|
+
ret = subject.scan_full(/\d{10}/, false, true)
|
232
|
+
ret.to_s.should == '0123456789'
|
233
|
+
subject.pos.should == 0
|
234
|
+
end
|
235
|
+
end
|
236
|
+
describe '#scan_full(re, false, false)' do
|
237
|
+
it 'is same as #match?' do
|
238
|
+
ret = subject.scan_full(/\d{10}/, false, false)
|
239
|
+
ret.should == 10
|
240
|
+
subject.pos.should == 0
|
241
|
+
end
|
242
|
+
end
|
243
|
+
describe '#search_full(re, true, true)' do
|
244
|
+
it 'is same as #scan_until' do
|
245
|
+
ret = subject.search_full(/789/, true, true)
|
246
|
+
ret.class.should == MmapScanner
|
247
|
+
ret.to_s.should == '0123456789'
|
248
|
+
subject.pos.should == 10
|
249
|
+
end
|
250
|
+
end
|
251
|
+
describe '#search_full(re, true, false)' do
|
252
|
+
it 'is same as #skip_until' do
|
253
|
+
ret = subject.search_full(/789/, true, false)
|
254
|
+
ret.should == 10
|
255
|
+
subject.pos.should == 10
|
256
|
+
end
|
257
|
+
end
|
258
|
+
describe '#search_full(re, false, true)' do
|
259
|
+
it 'is same as #check_until' do
|
260
|
+
ret = subject.search_full(/789/, false, true)
|
261
|
+
ret.to_s.should == '0123456789'
|
262
|
+
subject.pos.should == 0
|
263
|
+
end
|
264
|
+
end
|
265
|
+
describe '#search_full(re, false, false)' do
|
266
|
+
it 'is same as #exist?' do
|
267
|
+
ret = subject.search_full(/789/, false, false)
|
268
|
+
ret.should == 10
|
269
|
+
subject.pos.should == 0
|
270
|
+
end
|
271
|
+
end
|
176
272
|
describe '#rest' do
|
177
273
|
it 'returns rest data as MmapScanner' do
|
178
274
|
subject.pos = 9997
|
@@ -185,6 +281,12 @@ describe MmapScanner do
|
|
185
281
|
subject.rest.to_s.should == ''
|
186
282
|
end
|
187
283
|
end
|
284
|
+
describe '#terminate' do
|
285
|
+
it 'set position to end of MmapScanner area' do
|
286
|
+
subject.terminate.should == subject
|
287
|
+
subject.pos.should == 10000
|
288
|
+
end
|
289
|
+
end
|
188
290
|
describe '.new with position' do
|
189
291
|
it '#size is length of rest data' do
|
190
292
|
if src.respond_to? :size
|
@@ -220,6 +322,21 @@ describe MmapScanner do
|
|
220
322
|
subject.data.should be_kind_of MmapScanner::Mmap
|
221
323
|
end
|
222
324
|
end
|
325
|
+
context 'empty file' do
|
326
|
+
before do
|
327
|
+
tmpf = Tempfile.new 'mmapscanner_empty_file'
|
328
|
+
@file = File.open(tmpf.path)
|
329
|
+
end
|
330
|
+
it '#size returns 0' do
|
331
|
+
subject.size.should == 0
|
332
|
+
end
|
333
|
+
it '#to_s returns empty String' do
|
334
|
+
subject.to_s.should == ''
|
335
|
+
end
|
336
|
+
it '#eos? returns true' do
|
337
|
+
subject.eos?.should == true
|
338
|
+
end
|
339
|
+
end
|
223
340
|
end
|
224
341
|
|
225
342
|
context 'with String' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mmapscanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-07-19 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description:
|
15
15
|
email: tommy@tmtm.org
|
@@ -43,9 +43,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
43
43
|
version: '0'
|
44
44
|
requirements: []
|
45
45
|
rubyforge_project:
|
46
|
-
rubygems_version: 1.8.
|
46
|
+
rubygems_version: 1.8.23
|
47
47
|
signing_key:
|
48
48
|
specification_version: 3
|
49
49
|
summary: MmapScanner like StringScanner but it use mmap(2)-ed data
|
50
50
|
test_files:
|
51
51
|
- spec/mmapscanner_spec.rb
|
52
|
+
has_rdoc:
|