mmapscanner 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +14 -0
- data/ext/mmapscanner.c +65 -13
- data/spec/mmapscanner_spec.rb +117 -0
- metadata +4 -3
data/README.md
CHANGED
@@ -46,8 +46,21 @@ Usage
|
|
46
46
|
* scan はポインタ位置で正規表現との一致を試みます。一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
|
47
47
|
* scan_until は scan と同じですが、現在のポインタの位置以降で一致を試みます。
|
48
48
|
* check は scan と同じですが、ポインタを進めません。
|
49
|
+
* check_until は check と同じですが、現在のポインタの位置以降で一致を試みます。
|
49
50
|
* skip は scan と同じですが、一致したバイト数を返します。
|
51
|
+
* skip_until は skip と同じですが、現在のポインタの位置以降で一致を試みます。
|
50
52
|
* match? は check と同じですが、一致したバイト数を返します。
|
53
|
+
* exist? は match? と同じですが、現在のポインタの位置以降で一致を試みます。
|
54
|
+
* scan_full(re, s, f) はポインタの位置でスキャンします。
|
55
|
+
* scan_full(re, true, true) は scan(re) と同じです。
|
56
|
+
* scan_full(re, true, false) は skip(re) と同じです。
|
57
|
+
* scan_full(re, false, true) は check(re) と同じです。
|
58
|
+
* scan_full(re, false, false) は match?(re) と同じです。
|
59
|
+
* search_full(re, s, f) はポインタの位置以降でスキャンします。
|
60
|
+
* search_full(re, true, true) は scan_until(re) と同じです。
|
61
|
+
* search_full(re, true, false) は skip_until(re) と同じです。
|
62
|
+
* search_full(re, false, true) は check_until(re) と同じです。
|
63
|
+
* search_full(re, false, false) は exist?(re) と同じです。
|
51
64
|
* peek は指定したバイト数分のデータを返します。ポインタは進みません。
|
52
65
|
* eos? はポインタが末尾に達していると true を返します。
|
53
66
|
* rest はポインタ以降のデータを返します。
|
@@ -56,6 +69,7 @@ Usage
|
|
56
69
|
* matched_str は matched と同じですが、文字列を返します。
|
57
70
|
* pos は現在のポインタの位置を返します。
|
58
71
|
* pos= でポインタ位置を変更することができます。
|
72
|
+
* terminate はポインタを末尾に移動します。
|
59
73
|
|
60
74
|
Copyright
|
61
75
|
---------
|
data/ext/mmapscanner.c
CHANGED
@@ -53,12 +53,16 @@ static VALUE mmap_allocate(VALUE klass)
|
|
53
53
|
static VALUE mmap_initialize(int argc, VALUE *argv, VALUE obj)
|
54
54
|
{
|
55
55
|
mmap_data_t *data;
|
56
|
-
Data_Get_Struct(obj, mmap_data_t, data);
|
57
|
-
if (data->ptr)
|
58
|
-
rb_raise(rb_eRuntimeError, "already mapped");
|
59
56
|
VALUE file, voffset, vlength;
|
60
57
|
off_t offset = 0;
|
61
58
|
size_t length = 0;
|
59
|
+
int fd;
|
60
|
+
struct stat st;
|
61
|
+
void *ptr;
|
62
|
+
|
63
|
+
Data_Get_Struct(obj, mmap_data_t, data);
|
64
|
+
if (data->ptr)
|
65
|
+
rb_raise(rb_eRuntimeError, "already mapped");
|
62
66
|
rb_scan_args(argc, argv, "12", &file, &voffset, &vlength);
|
63
67
|
if (TYPE(file) != T_FILE)
|
64
68
|
rb_raise(rb_eTypeError, "File object required");
|
@@ -66,15 +70,13 @@ static VALUE mmap_initialize(int argc, VALUE *argv, VALUE obj)
|
|
66
70
|
rb_raise(rb_eRangeError, "offset out of range: %lld", NUM2LL(voffset));
|
67
71
|
if (vlength != Qnil && NUM2LL(vlength) < 0)
|
68
72
|
rb_raise(rb_eRangeError, "length out of range: %lld", NUM2LL(vlength));
|
69
|
-
|
70
|
-
struct stat st;
|
73
|
+
fd = FIX2INT(rb_funcall(file, rb_intern("fileno"), 0));
|
71
74
|
if (fstat(fd, &st) < 0)
|
72
75
|
rb_sys_fail("fstat");
|
73
76
|
offset = voffset == Qnil ? 0 : NUM2SIZET(voffset);
|
74
77
|
length = vlength == Qnil ? st.st_size : NUM2SIZET(vlength);
|
75
78
|
if (offset + length > st.st_size)
|
76
79
|
length = st.st_size - offset;
|
77
|
-
void *ptr;
|
78
80
|
if ((ptr = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset)) == MAP_FAILED)
|
79
81
|
rb_sys_fail("mmap");
|
80
82
|
|
@@ -159,7 +161,16 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
|
|
159
161
|
src = ms->data;
|
160
162
|
src_size_defined = 1;
|
161
163
|
} else if (TYPE(src) == T_FILE) {
|
162
|
-
|
164
|
+
int fd = FIX2INT(rb_funcall(src, rb_intern("fileno"), 0));
|
165
|
+
struct stat st;
|
166
|
+
if (fstat(fd, &st) < 0)
|
167
|
+
rb_sys_fail("fstat");
|
168
|
+
if (st.st_size == 0) {
|
169
|
+
src = rb_str_new(NULL, 0);
|
170
|
+
src_size = 0;
|
171
|
+
} else {
|
172
|
+
src = rb_funcall(cMmap, rb_intern("new"), 1, src);
|
173
|
+
}
|
163
174
|
}
|
164
175
|
if (rb_obj_class(src) == cMmap) {
|
165
176
|
if (!src_size_defined) {
|
@@ -260,8 +271,8 @@ static VALUE pos(VALUE obj)
|
|
260
271
|
static VALUE set_pos(VALUE obj, VALUE pos)
|
261
272
|
{
|
262
273
|
mmapscanner_t *ms;
|
263
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
264
274
|
size_t p, size;
|
275
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
265
276
|
|
266
277
|
if (NUM2LL(pos) < 0)
|
267
278
|
rb_raise(rb_eRangeError, "out of range: %lld", NUM2LL(pos));
|
@@ -276,7 +287,6 @@ static VALUE set_pos(VALUE obj, VALUE pos)
|
|
276
287
|
static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeonly)
|
277
288
|
{
|
278
289
|
mmapscanner_t *ms;
|
279
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
280
290
|
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
281
291
|
regex_t *reg;
|
282
292
|
int tmpreg;
|
@@ -285,6 +295,7 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeon
|
|
285
295
|
char *ptr;
|
286
296
|
mmap_data_t *mdata;
|
287
297
|
|
298
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
288
299
|
ms->matched = 0;
|
289
300
|
Check_Type(re, T_REGEXP);
|
290
301
|
if (ms->pos > ms->size)
|
@@ -355,6 +366,18 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeon
|
|
355
366
|
return create_from_mmapscanner(obj, old_pos, matched_len);
|
356
367
|
}
|
357
368
|
|
369
|
+
static VALUE scan_full(VALUE obj, VALUE re, VALUE forward, VALUE ret_ms)
|
370
|
+
{
|
371
|
+
return scan_sub(obj, re, (forward != Qnil && forward != Qfalse), 1,
|
372
|
+
(ret_ms == Qnil || ret_ms == Qfalse));
|
373
|
+
}
|
374
|
+
|
375
|
+
static VALUE search_full(VALUE obj, VALUE re, VALUE forward, VALUE ret_ms)
|
376
|
+
{
|
377
|
+
return scan_sub(obj, re, (forward != Qnil && forward != Qfalse), 0,
|
378
|
+
(ret_ms == Qnil || ret_ms == Qfalse));
|
379
|
+
}
|
380
|
+
|
358
381
|
static VALUE scan(VALUE obj, VALUE re)
|
359
382
|
{
|
360
383
|
return scan_sub(obj, re, 1, 1, 0);
|
@@ -370,21 +393,36 @@ static VALUE check(VALUE obj, VALUE re)
|
|
370
393
|
return scan_sub(obj, re, 0, 1, 0);
|
371
394
|
}
|
372
395
|
|
396
|
+
static VALUE check_until(VALUE obj, VALUE re)
|
397
|
+
{
|
398
|
+
return scan_sub(obj, re, 0, 0, 0);
|
399
|
+
}
|
400
|
+
|
373
401
|
static VALUE skip(VALUE obj, VALUE re)
|
374
402
|
{
|
375
403
|
return scan_sub(obj, re, 1, 1, 1);
|
376
404
|
}
|
377
405
|
|
406
|
+
static VALUE skip_until(VALUE obj, VALUE re)
|
407
|
+
{
|
408
|
+
return scan_sub(obj, re, 1, 0, 1);
|
409
|
+
}
|
410
|
+
|
378
411
|
static VALUE match_p(VALUE obj, VALUE re)
|
379
412
|
{
|
380
413
|
return scan_sub(obj, re, 0, 1, 1);
|
381
414
|
}
|
382
415
|
|
416
|
+
static VALUE exist_p(VALUE obj, VALUE re)
|
417
|
+
{
|
418
|
+
return scan_sub(obj, re, 0, 0, 1);
|
419
|
+
}
|
420
|
+
|
383
421
|
static VALUE peek(VALUE obj, VALUE size)
|
384
422
|
{
|
385
423
|
mmapscanner_t *ms;
|
386
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
387
424
|
size_t sz = NUM2SIZET(size);
|
425
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
388
426
|
if (sz > ms->size - ms->pos)
|
389
427
|
sz = ms->size - ms->pos;
|
390
428
|
return create_from_mmapscanner(obj, ms->pos, sz);
|
@@ -404,7 +442,7 @@ static VALUE rest(VALUE obj)
|
|
404
442
|
return create_from_mmapscanner(obj, ms->pos, ms->size - ms->pos);
|
405
443
|
}
|
406
444
|
|
407
|
-
static int matched_sub(int argc, VALUE *argv, mmapscanner_t *ms,
|
445
|
+
static int matched_sub(int argc, VALUE *argv, mmapscanner_t *ms, size_t *pos, size_t *len)
|
408
446
|
{
|
409
447
|
int i = 0;
|
410
448
|
if (ms->matched == 0)
|
@@ -429,8 +467,8 @@ static int matched_sub(int argc, VALUE *argv, mmapscanner_t *ms, int *pos, int *
|
|
429
467
|
static VALUE matched(int argc, VALUE *argv, VALUE obj)
|
430
468
|
{
|
431
469
|
mmapscanner_t *ms;
|
432
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
433
470
|
size_t pos, len;
|
471
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
434
472
|
if (matched_sub(argc, argv, ms, &pos, &len) == 0)
|
435
473
|
return Qnil;
|
436
474
|
return create_from_mmapscanner(obj, pos, len);
|
@@ -439,9 +477,9 @@ static VALUE matched(int argc, VALUE *argv, VALUE obj)
|
|
439
477
|
static VALUE matched_str(int argc, VALUE *argv, VALUE obj)
|
440
478
|
{
|
441
479
|
mmapscanner_t *ms;
|
442
|
-
Data_Get_Struct(obj, mmapscanner_t, ms);
|
443
480
|
mmap_data_t *mdata;
|
444
481
|
size_t pos, len;
|
482
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
445
483
|
if (matched_sub(argc, argv, ms, &pos, &len) == 0)
|
446
484
|
return Qnil;
|
447
485
|
if (TYPE(ms->data) == T_STRING)
|
@@ -452,6 +490,14 @@ static VALUE matched_str(int argc, VALUE *argv, VALUE obj)
|
|
452
490
|
return rb_str_new(mdata->ptr+ms->offset+pos, len);
|
453
491
|
}
|
454
492
|
|
493
|
+
static VALUE terminate(VALUE obj)
|
494
|
+
{
|
495
|
+
mmapscanner_t *ms;
|
496
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
497
|
+
ms->pos = ms->size;
|
498
|
+
return obj;
|
499
|
+
}
|
500
|
+
|
455
501
|
void Init_mmapscanner(void)
|
456
502
|
{
|
457
503
|
cMmapScanner = rb_define_class("MmapScanner", rb_cObject);
|
@@ -466,16 +512,22 @@ void Init_mmapscanner(void)
|
|
466
512
|
rb_define_method(cMmapScanner, "inspect", inspect, 0);
|
467
513
|
rb_define_method(cMmapScanner, "pos", pos, 0);
|
468
514
|
rb_define_method(cMmapScanner, "pos=", set_pos, 1);
|
515
|
+
rb_define_method(cMmapScanner, "scan_full", scan_full, 3);
|
516
|
+
rb_define_method(cMmapScanner, "search_full", search_full, 3);
|
469
517
|
rb_define_method(cMmapScanner, "scan", scan, 1);
|
470
518
|
rb_define_method(cMmapScanner, "scan_until", scan_until, 1);
|
471
519
|
rb_define_method(cMmapScanner, "check", check, 1);
|
520
|
+
rb_define_method(cMmapScanner, "check_until", check_until, 1);
|
472
521
|
rb_define_method(cMmapScanner, "skip", skip, 1);
|
522
|
+
rb_define_method(cMmapScanner, "skip_until", skip_until, 1);
|
473
523
|
rb_define_method(cMmapScanner, "match?", match_p, 1);
|
524
|
+
rb_define_method(cMmapScanner, "exist?", exist_p, 1);
|
474
525
|
rb_define_method(cMmapScanner, "peek", peek, 1);
|
475
526
|
rb_define_method(cMmapScanner, "eos?", eos_p, 0);
|
476
527
|
rb_define_method(cMmapScanner, "rest", rest, 0);
|
477
528
|
rb_define_method(cMmapScanner, "matched", matched, -1);
|
478
529
|
rb_define_method(cMmapScanner, "matched_str", matched_str, -1);
|
530
|
+
rb_define_method(cMmapScanner, "terminate", terminate, 0);
|
479
531
|
|
480
532
|
cMmap = rb_define_class_under(cMmapScanner, "Mmap", rb_cObject);
|
481
533
|
rb_define_alloc_func(cMmap, mmap_allocate);
|
data/spec/mmapscanner_spec.rb
CHANGED
@@ -105,6 +105,20 @@ describe MmapScanner do
|
|
105
105
|
subject.pos.should == 0
|
106
106
|
end
|
107
107
|
end
|
108
|
+
describe '#check_until' do
|
109
|
+
it 'returns matched data as MmapScanner' do
|
110
|
+
ret = subject.check_until(/123/)
|
111
|
+
ret.class.should == MmapScanner
|
112
|
+
ret.to_s.should == '0123'
|
113
|
+
end
|
114
|
+
it 'returns nil if not matched' do
|
115
|
+
subject.check_until(/abc/).should be_nil
|
116
|
+
end
|
117
|
+
it 'do not forward current position' do
|
118
|
+
ret = subject.check_until(/123/)
|
119
|
+
subject.pos.should == 0
|
120
|
+
end
|
121
|
+
end
|
108
122
|
describe '#skip' do
|
109
123
|
it 'returns length of matched data' do
|
110
124
|
subject.skip(/\d{10}/).should == 10
|
@@ -117,6 +131,18 @@ describe MmapScanner do
|
|
117
131
|
subject.pos.should == 10
|
118
132
|
end
|
119
133
|
end
|
134
|
+
describe '#skip_until' do
|
135
|
+
it 'returns length of matched data' do
|
136
|
+
subject.skip_until(/123/).should == 4
|
137
|
+
end
|
138
|
+
it 'returns nil if not matched' do
|
139
|
+
subject.skip_until(/abc/).should be_nil
|
140
|
+
end
|
141
|
+
it 'forward current position' do
|
142
|
+
subject.skip_until(/123/)
|
143
|
+
subject.pos.should == 4
|
144
|
+
end
|
145
|
+
end
|
120
146
|
describe '#match?' do
|
121
147
|
it 'returns length of matched data' do
|
122
148
|
subject.match?(/\d{10}/).should == 10
|
@@ -129,6 +155,18 @@ describe MmapScanner do
|
|
129
155
|
subject.pos.should == 0
|
130
156
|
end
|
131
157
|
end
|
158
|
+
describe '#exist?' do
|
159
|
+
it 'returns length of matched data' do
|
160
|
+
subject.exist?(/123/).should == 4
|
161
|
+
end
|
162
|
+
it 'returns nil if not matched' do
|
163
|
+
subject.exist?(/abc/).should be_nil
|
164
|
+
end
|
165
|
+
it 'do not forward current position' do
|
166
|
+
subject.exist?(/123/)
|
167
|
+
subject.pos.should == 0
|
168
|
+
end
|
169
|
+
end
|
132
170
|
describe '#matched' do
|
133
171
|
it 'returns matched data after scan' do
|
134
172
|
subject.scan(/\d{6}/)
|
@@ -173,6 +211,64 @@ describe MmapScanner do
|
|
173
211
|
subject.eos?.should == false
|
174
212
|
end
|
175
213
|
end
|
214
|
+
describe '#scan_full(re, true, true)' do
|
215
|
+
it 'is same as #scan' do
|
216
|
+
ret = subject.scan_full(/\d{10}/, true, true)
|
217
|
+
ret.class.should == MmapScanner
|
218
|
+
ret.to_s.should == '0123456789'
|
219
|
+
subject.pos.should == 10
|
220
|
+
end
|
221
|
+
end
|
222
|
+
describe '#scan_full(re, true, false)' do
|
223
|
+
it 'is same as #skip' do
|
224
|
+
ret = subject.scan_full(/\d{10}/, true, false)
|
225
|
+
ret.should == 10
|
226
|
+
subject.pos.should == 10
|
227
|
+
end
|
228
|
+
end
|
229
|
+
describe '#scan_full(re, false, true)' do
|
230
|
+
it 'is same as #check' do
|
231
|
+
ret = subject.scan_full(/\d{10}/, false, true)
|
232
|
+
ret.to_s.should == '0123456789'
|
233
|
+
subject.pos.should == 0
|
234
|
+
end
|
235
|
+
end
|
236
|
+
describe '#scan_full(re, false, false)' do
|
237
|
+
it 'is same as #match?' do
|
238
|
+
ret = subject.scan_full(/\d{10}/, false, false)
|
239
|
+
ret.should == 10
|
240
|
+
subject.pos.should == 0
|
241
|
+
end
|
242
|
+
end
|
243
|
+
describe '#search_full(re, true, true)' do
|
244
|
+
it 'is same as #scan_until' do
|
245
|
+
ret = subject.search_full(/789/, true, true)
|
246
|
+
ret.class.should == MmapScanner
|
247
|
+
ret.to_s.should == '0123456789'
|
248
|
+
subject.pos.should == 10
|
249
|
+
end
|
250
|
+
end
|
251
|
+
describe '#search_full(re, true, false)' do
|
252
|
+
it 'is same as #skip_until' do
|
253
|
+
ret = subject.search_full(/789/, true, false)
|
254
|
+
ret.should == 10
|
255
|
+
subject.pos.should == 10
|
256
|
+
end
|
257
|
+
end
|
258
|
+
describe '#search_full(re, false, true)' do
|
259
|
+
it 'is same as #check_until' do
|
260
|
+
ret = subject.search_full(/789/, false, true)
|
261
|
+
ret.to_s.should == '0123456789'
|
262
|
+
subject.pos.should == 0
|
263
|
+
end
|
264
|
+
end
|
265
|
+
describe '#search_full(re, false, false)' do
|
266
|
+
it 'is same as #exist?' do
|
267
|
+
ret = subject.search_full(/789/, false, false)
|
268
|
+
ret.should == 10
|
269
|
+
subject.pos.should == 0
|
270
|
+
end
|
271
|
+
end
|
176
272
|
describe '#rest' do
|
177
273
|
it 'returns rest data as MmapScanner' do
|
178
274
|
subject.pos = 9997
|
@@ -185,6 +281,12 @@ describe MmapScanner do
|
|
185
281
|
subject.rest.to_s.should == ''
|
186
282
|
end
|
187
283
|
end
|
284
|
+
describe '#terminate' do
|
285
|
+
it 'set position to end of MmapScanner area' do
|
286
|
+
subject.terminate.should == subject
|
287
|
+
subject.pos.should == 10000
|
288
|
+
end
|
289
|
+
end
|
188
290
|
describe '.new with position' do
|
189
291
|
it '#size is length of rest data' do
|
190
292
|
if src.respond_to? :size
|
@@ -220,6 +322,21 @@ describe MmapScanner do
|
|
220
322
|
subject.data.should be_kind_of MmapScanner::Mmap
|
221
323
|
end
|
222
324
|
end
|
325
|
+
context 'empty file' do
|
326
|
+
before do
|
327
|
+
tmpf = Tempfile.new 'mmapscanner_empty_file'
|
328
|
+
@file = File.open(tmpf.path)
|
329
|
+
end
|
330
|
+
it '#size returns 0' do
|
331
|
+
subject.size.should == 0
|
332
|
+
end
|
333
|
+
it '#to_s returns empty String' do
|
334
|
+
subject.to_s.should == ''
|
335
|
+
end
|
336
|
+
it '#eos? returns true' do
|
337
|
+
subject.eos?.should == true
|
338
|
+
end
|
339
|
+
end
|
223
340
|
end
|
224
341
|
|
225
342
|
context 'with String' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mmapscanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-07-19 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description:
|
15
15
|
email: tommy@tmtm.org
|
@@ -43,9 +43,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
43
43
|
version: '0'
|
44
44
|
requirements: []
|
45
45
|
rubyforge_project:
|
46
|
-
rubygems_version: 1.8.
|
46
|
+
rubygems_version: 1.8.23
|
47
47
|
signing_key:
|
48
48
|
specification_version: 3
|
49
49
|
summary: MmapScanner like StringScanner but it use mmap(2)-ed data
|
50
50
|
test_files:
|
51
51
|
- spec/mmapscanner_spec.rb
|
52
|
+
has_rdoc:
|