mmapscanner 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -1
- data/ext/mmapscanner.c +124 -45
- data/spec/mmapscanner_spec.rb +57 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -43,13 +43,18 @@ Usage
|
|
43
43
|
* size, length は mmap(2) したサイズを返します。
|
44
44
|
* to_s は mmap(2) した領域を String で返します。Encoding は常に ASCII-8BIT です。
|
45
45
|
* slice は mmap(2) した領域の一部を新たな MmapScanner オブジェクトで返します。
|
46
|
-
* scan
|
46
|
+
* scan はポインタ位置で正規表現との一致を試みます。一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
|
47
|
+
* scan_until は scan と同じですが、現在のポインタの位置以降で一致を試みます。
|
47
48
|
* check は scan と同じですが、ポインタを進めません。
|
48
49
|
* skip は scan と同じですが、一致したバイト数を返します。
|
49
50
|
* match? は check と同じですが、一致したバイト数を返します。
|
50
51
|
* peek は指定したバイト数分のデータを返します。ポインタは進みません。
|
51
52
|
* eos? はポインタが末尾に達していると true を返します。
|
52
53
|
* rest はポインタ以降のデータを返します。
|
54
|
+
* matched は正規表現に一致した部分を MmapScanner オブジェクトで返します。
|
55
|
+
* matched(n) は正規表現の n番目の括弧に一致した部分を MmapScanner オブジェクトで返します。
|
56
|
+
* pos は現在のポインタの位置を返します。
|
57
|
+
* pos= でポインタ位置を変更することができます。
|
53
58
|
|
54
59
|
Copyright
|
55
60
|
---------
|
data/ext/mmapscanner.c
CHANGED
@@ -13,6 +13,15 @@ typedef struct {
|
|
13
13
|
size_t size;
|
14
14
|
} mmap_data_t;
|
15
15
|
|
16
|
+
typedef struct {
|
17
|
+
size_t offset;
|
18
|
+
size_t size;
|
19
|
+
size_t pos;
|
20
|
+
int matched;
|
21
|
+
size_t matched_pos;
|
22
|
+
struct re_registers regs;
|
23
|
+
} mmapscanner_t;
|
24
|
+
|
16
25
|
static void mmap_free(mmap_data_t *data)
|
17
26
|
{
|
18
27
|
if (data->ptr)
|
@@ -33,12 +42,31 @@ static VALUE create_mmap_object(int fd, size_t offset, size_t size)
|
|
33
42
|
return Data_Wrap_Struct(cMmap, 0, mmap_free, data);
|
34
43
|
}
|
35
44
|
|
45
|
+
static void mmapscanner_free(mmapscanner_t *ms)
|
46
|
+
{
|
47
|
+
onig_region_free(&ms->regs, 0);
|
48
|
+
free(ms);
|
49
|
+
}
|
50
|
+
|
51
|
+
VALUE allocate(VALUE klass)
|
52
|
+
{
|
53
|
+
mmapscanner_t *ms;
|
54
|
+
ms = malloc(sizeof *ms);
|
55
|
+
ms->offset = 0;
|
56
|
+
ms->size = 0;
|
57
|
+
ms->pos = 0;
|
58
|
+
ms->matched_pos = 0;
|
59
|
+
onig_region_init(&ms->regs);
|
60
|
+
return Data_Wrap_Struct(klass, 0, mmapscanner_free, ms);
|
61
|
+
}
|
62
|
+
|
36
63
|
static VALUE initialize(int argc, VALUE *argv, VALUE obj)
|
37
64
|
{
|
38
65
|
VALUE src, voffset, vsize;
|
39
66
|
size_t offset, size;
|
40
67
|
size_t src_offset, src_size;
|
41
68
|
VALUE src_data;
|
69
|
+
mmapscanner_t *ms;
|
42
70
|
|
43
71
|
rb_scan_args(argc, argv, "12", &src, &voffset, &vsize);
|
44
72
|
if (voffset != Qnil && NUM2LL(voffset) < 0)
|
@@ -47,8 +75,9 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
|
|
47
75
|
rb_raise(rb_eRangeError, "length out of range: %lld", NUM2LL(vsize));
|
48
76
|
offset = voffset == Qnil ? 0 : NUM2SIZET(voffset);
|
49
77
|
if (rb_obj_class(src) == cMmapScanner) {
|
50
|
-
|
51
|
-
|
78
|
+
Data_Get_Struct(src, mmapscanner_t, ms);
|
79
|
+
src_offset = ms->offset;
|
80
|
+
src_size = ms->size;
|
52
81
|
src_data = rb_iv_get(src, "data");
|
53
82
|
} else if (TYPE(src) == T_FILE) {
|
54
83
|
int fd;
|
@@ -68,27 +97,35 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
|
|
68
97
|
} else {
|
69
98
|
rb_raise(rb_eTypeError, "wrong argument type %s (expected File/String/MmapScanner)", rb_obj_classname(src));
|
70
99
|
}
|
71
|
-
if (offset
|
72
|
-
rb_raise(rb_eRangeError, "length out of range: %zu
|
100
|
+
if (offset > src_size)
|
101
|
+
rb_raise(rb_eRangeError, "length out of range: %zu > %zu", offset, src_size);
|
73
102
|
size = vsize == Qnil ? src_size - offset : NUM2SIZET(vsize);
|
74
103
|
if (size > src_size - offset)
|
75
104
|
size = src_size - offset;
|
76
|
-
|
77
|
-
|
105
|
+
|
106
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
107
|
+
ms->offset = src_offset + offset;
|
108
|
+
ms->size = size;
|
109
|
+
ms->pos = 0;
|
110
|
+
ms->matched = 0;
|
111
|
+
ms->matched_pos = 0;
|
78
112
|
rb_iv_set(obj, "data", src_data);
|
79
|
-
rb_iv_set(obj, "pos", INT2NUM(0));
|
80
113
|
return Qnil;
|
81
114
|
}
|
82
115
|
|
83
116
|
static VALUE size(VALUE obj)
|
84
117
|
{
|
85
|
-
|
118
|
+
mmapscanner_t *ms;
|
119
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
120
|
+
return SIZET2NUM(ms->size);
|
86
121
|
}
|
87
122
|
|
88
123
|
static VALUE to_s(VALUE obj)
|
89
124
|
{
|
90
|
-
|
91
|
-
|
125
|
+
mmapscanner_t *ms;
|
126
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
127
|
+
size_t offset = ms->offset;
|
128
|
+
size_t size = ms->size;
|
92
129
|
VALUE data = rb_iv_get(obj, "data");
|
93
130
|
mmap_data_t *mdata;
|
94
131
|
|
@@ -110,30 +147,35 @@ static VALUE inspect(VALUE obj)
|
|
110
147
|
|
111
148
|
static VALUE pos(VALUE obj)
|
112
149
|
{
|
113
|
-
|
150
|
+
mmapscanner_t *ms;
|
151
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
152
|
+
return SIZET2NUM(ms->pos);
|
114
153
|
}
|
115
154
|
|
116
155
|
static VALUE set_pos(VALUE obj, VALUE pos)
|
117
156
|
{
|
157
|
+
mmapscanner_t *ms;
|
158
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
118
159
|
size_t p, size;
|
119
160
|
|
120
161
|
if (NUM2LL(pos) < 0)
|
121
162
|
rb_raise(rb_eRangeError, "out of range: %lld", NUM2LL(pos));
|
122
163
|
p = NUM2SIZET(pos);
|
123
|
-
size =
|
164
|
+
size = ms->size;
|
124
165
|
if (p > size)
|
125
166
|
rb_raise(rb_eRangeError, "out of range: %zu > %zu", p, size);
|
126
|
-
|
167
|
+
ms->pos = p;
|
127
168
|
return pos;
|
128
169
|
}
|
129
170
|
|
130
|
-
static VALUE scan_sub(VALUE obj, VALUE re, int forward)
|
171
|
+
static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeonly)
|
131
172
|
{
|
173
|
+
mmapscanner_t *ms;
|
174
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
132
175
|
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
133
176
|
regex_t *reg;
|
134
177
|
int tmpreg;
|
135
178
|
int result;
|
136
|
-
struct re_registers regs;
|
137
179
|
size_t old_pos, matched_len;
|
138
180
|
char *ptr;
|
139
181
|
size_t pos, size;
|
@@ -141,8 +183,8 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
|
|
141
183
|
mmap_data_t *mdata;
|
142
184
|
|
143
185
|
Check_Type(re, T_REGEXP);
|
144
|
-
pos =
|
145
|
-
size =
|
186
|
+
pos = ms->pos;
|
187
|
+
size = ms->size;
|
146
188
|
if (pos >= size)
|
147
189
|
return Qnil;
|
148
190
|
data = rb_iv_get(obj, "data");
|
@@ -152,17 +194,24 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
|
|
152
194
|
Data_Get_Struct(data, mmap_data_t, mdata);
|
153
195
|
ptr = mdata->ptr;
|
154
196
|
}
|
155
|
-
ptr +=
|
197
|
+
ptr += ms->offset;
|
156
198
|
|
157
199
|
reg = rb_reg_prepare_re(re, rb_str_new("", 0));
|
158
200
|
tmpreg = reg != RREGEXP(re)->ptr;
|
159
201
|
if (!tmpreg) RREGEXP(re)->usecnt++;
|
160
202
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
203
|
+
if (headonly) {
|
204
|
+
result = onig_match(reg, (UChar*)(ptr+pos),
|
205
|
+
(UChar*)(ptr+size),
|
206
|
+
(UChar*)(ptr+pos),
|
207
|
+
&ms->regs, ONIG_OPTION_NONE);
|
208
|
+
} else {
|
209
|
+
result = onig_search(reg, (UChar*)(ptr+pos),
|
210
|
+
(UChar*)(ptr+size),
|
211
|
+
(UChar*)(ptr+pos),
|
212
|
+
(UChar*)(ptr+size),
|
213
|
+
&ms->regs, ONIG_OPTION_NONE);
|
214
|
+
}
|
166
215
|
if (!tmpreg) RREGEXP(re)->usecnt--;
|
167
216
|
if (tmpreg) {
|
168
217
|
if (RREGEXP(re)->usecnt) {
|
@@ -175,65 +224,93 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
|
|
175
224
|
if (result < 0)
|
176
225
|
return Qnil;
|
177
226
|
old_pos = pos;
|
178
|
-
matched_len = regs.end[0];
|
227
|
+
matched_len = ms->regs.end[0];
|
179
228
|
if (forward) {
|
180
229
|
pos += matched_len;
|
181
|
-
|
230
|
+
ms->pos = pos;
|
182
231
|
}
|
183
|
-
|
232
|
+
ms->matched = 1;
|
233
|
+
ms->matched_pos = old_pos;
|
234
|
+
|
235
|
+
if (sizeonly)
|
236
|
+
return SIZET2NUM(matched_len);
|
237
|
+
return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(old_pos), SIZET2NUM(matched_len));
|
184
238
|
}
|
185
239
|
|
186
240
|
static VALUE scan(VALUE obj, VALUE re)
|
187
241
|
{
|
188
|
-
return scan_sub(obj, re, 1);
|
242
|
+
return scan_sub(obj, re, 1, 1, 0);
|
243
|
+
}
|
244
|
+
|
245
|
+
static VALUE scan_until(VALUE obj, VALUE re)
|
246
|
+
{
|
247
|
+
return scan_sub(obj, re, 1, 0, 0);
|
189
248
|
}
|
190
249
|
|
191
250
|
static VALUE check(VALUE obj, VALUE re)
|
192
251
|
{
|
193
|
-
return scan_sub(obj, re, 0);
|
252
|
+
return scan_sub(obj, re, 0, 1, 0);
|
194
253
|
}
|
195
254
|
|
196
255
|
static VALUE skip(VALUE obj, VALUE re)
|
197
256
|
{
|
198
|
-
|
199
|
-
if (ret == Qnil)
|
200
|
-
return ret;
|
201
|
-
return rb_iv_get(ret, "size");
|
257
|
+
return scan_sub(obj, re, 1, 1, 1);
|
202
258
|
}
|
203
259
|
|
204
260
|
static VALUE match_p(VALUE obj, VALUE re)
|
205
261
|
{
|
206
|
-
|
207
|
-
if (ret == Qnil)
|
208
|
-
return ret;
|
209
|
-
return rb_iv_get(ret, "size");
|
262
|
+
return scan_sub(obj, re, 0, 1, 1);
|
210
263
|
}
|
211
264
|
|
212
265
|
static VALUE peek(VALUE obj, VALUE size)
|
213
266
|
{
|
267
|
+
mmapscanner_t *ms;
|
268
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
214
269
|
size_t sz = NUM2SIZET(size);
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
sz = data_size - data_pos;
|
219
|
-
return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(data_pos), SIZET2NUM(sz));
|
270
|
+
if (sz > ms->size - ms->pos)
|
271
|
+
sz = ms->size - ms->pos;
|
272
|
+
return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(ms->pos), SIZET2NUM(sz));
|
220
273
|
}
|
221
274
|
|
222
275
|
static VALUE eos_p(VALUE obj)
|
223
276
|
{
|
224
|
-
|
225
|
-
|
226
|
-
return
|
277
|
+
mmapscanner_t *ms;
|
278
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
279
|
+
return ms->pos >= ms->size ? Qtrue : Qfalse;
|
227
280
|
}
|
228
281
|
|
229
282
|
static VALUE rest(VALUE obj)
|
230
283
|
{
|
231
|
-
|
284
|
+
mmapscanner_t *ms;
|
285
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
286
|
+
return rb_funcall(cMmapScanner, rb_intern("new"), 2, obj, SIZET2NUM(ms->pos));
|
287
|
+
}
|
288
|
+
|
289
|
+
static VALUE matched(int argc, VALUE *argv, VALUE obj)
|
290
|
+
{
|
291
|
+
mmapscanner_t *ms;
|
292
|
+
Data_Get_Struct(obj, mmapscanner_t, ms);
|
293
|
+
VALUE nth;
|
294
|
+
int i = 0;
|
295
|
+
size_t pos, len;
|
296
|
+
|
297
|
+
if (rb_scan_args(argc, argv, "01", &nth) == 1)
|
298
|
+
i = NUM2LONG(nth);
|
299
|
+
if (ms->matched == 0)
|
300
|
+
return Qnil;
|
301
|
+
if (i < 0)
|
302
|
+
return Qnil;
|
303
|
+
if (i >= ms->regs.num_regs)
|
304
|
+
return Qnil;
|
305
|
+
pos = ms->matched_pos + ms->regs.beg[i];
|
306
|
+
len = ms->regs.end[i] - ms->regs.beg[i];
|
307
|
+
return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(pos), SIZET2NUM(len));
|
232
308
|
}
|
233
309
|
|
234
310
|
void Init_mmapscanner(void)
|
235
311
|
{
|
236
312
|
cMmapScanner = rb_define_class("MmapScanner", rb_cObject);
|
313
|
+
rb_define_alloc_func(cMmapScanner, allocate);
|
237
314
|
rb_define_method(cMmapScanner, "initialize", initialize, -1);
|
238
315
|
rb_define_method(cMmapScanner, "size", size, 0);
|
239
316
|
rb_define_method(cMmapScanner, "length", size, 0);
|
@@ -244,12 +321,14 @@ void Init_mmapscanner(void)
|
|
244
321
|
rb_define_method(cMmapScanner, "pos", pos, 0);
|
245
322
|
rb_define_method(cMmapScanner, "pos=", set_pos, 1);
|
246
323
|
rb_define_method(cMmapScanner, "scan", scan, 1);
|
324
|
+
rb_define_method(cMmapScanner, "scan_until", scan_until, 1);
|
247
325
|
rb_define_method(cMmapScanner, "check", check, 1);
|
248
326
|
rb_define_method(cMmapScanner, "skip", skip, 1);
|
249
327
|
rb_define_method(cMmapScanner, "match?", match_p, 1);
|
250
328
|
rb_define_method(cMmapScanner, "peek", peek, 1);
|
251
329
|
rb_define_method(cMmapScanner, "eos?", eos_p, 0);
|
252
330
|
rb_define_method(cMmapScanner, "rest", rest, 0);
|
331
|
+
rb_define_method(cMmapScanner, "matched", matched, -1);
|
253
332
|
|
254
333
|
cMmap = rb_define_class_under(cMmapScanner, "Mmap", rb_cObject);
|
255
334
|
}
|
data/spec/mmapscanner_spec.rb
CHANGED
@@ -51,6 +51,21 @@ describe MmapScanner do
|
|
51
51
|
subject.pos.should == 10
|
52
52
|
end
|
53
53
|
end
|
54
|
+
describe '#scan_until' do
|
55
|
+
it 'returns matched data as MmapScanner' do
|
56
|
+
subject.scan(/012/)
|
57
|
+
ret = subject.scan_until(/678/)
|
58
|
+
ret.class.should == MmapScanner
|
59
|
+
ret.to_s.should == '345678'
|
60
|
+
end
|
61
|
+
it 'returns nil if not matched' do
|
62
|
+
subject.scan_until(/321/).should be_nil
|
63
|
+
end
|
64
|
+
it 'forward current position' do
|
65
|
+
subject.scan_until(/456/)
|
66
|
+
subject.pos.should == 7
|
67
|
+
end
|
68
|
+
end
|
54
69
|
describe '#check' do
|
55
70
|
it 'returns matched data as MmapScanner' do
|
56
71
|
ret = subject.check(/\d{10}/)
|
@@ -89,6 +104,30 @@ describe MmapScanner do
|
|
89
104
|
subject.pos.should == 0
|
90
105
|
end
|
91
106
|
end
|
107
|
+
describe '#matched' do
|
108
|
+
it 'returns matched data after scan' do
|
109
|
+
subject.scan(/\d{6}/)
|
110
|
+
subject.matched.to_s.should == '012345'
|
111
|
+
end
|
112
|
+
it 'returns matched data after scan_until' do
|
113
|
+
subject.scan_until(/4567/)
|
114
|
+
subject.matched.to_s.should == '4567'
|
115
|
+
end
|
116
|
+
it 'returns nil if there is not matched data' do
|
117
|
+
subject.matched.should be_nil
|
118
|
+
end
|
119
|
+
end
|
120
|
+
describe '#matched(nth)' do
|
121
|
+
it 'returns nth part of matched string' do
|
122
|
+
subject.scan(/(..)(..)(..)/)
|
123
|
+
subject.matched(0).to_s.should == '012345'
|
124
|
+
subject.matched(1).to_s.should == '01'
|
125
|
+
subject.matched(2).to_s.should == '23'
|
126
|
+
subject.matched(3).to_s.should == '45'
|
127
|
+
subject.matched(4).should be_nil
|
128
|
+
subject.matched(-1).should be_nil
|
129
|
+
end
|
130
|
+
end
|
92
131
|
describe '#peek' do
|
93
132
|
it 'returns MmapScanner' do
|
94
133
|
subject.peek(10).should be_instance_of MmapScanner
|
@@ -115,6 +154,10 @@ describe MmapScanner do
|
|
115
154
|
ret.should be_instance_of MmapScanner
|
116
155
|
ret.to_s.should == '789'
|
117
156
|
end
|
157
|
+
it 'returns empty MmapScanner if it reached to end' do
|
158
|
+
subject.pos = 10000
|
159
|
+
subject.rest.to_s.should == ''
|
160
|
+
end
|
118
161
|
end
|
119
162
|
describe '.new with position' do
|
120
163
|
it '#size is length of rest data' do
|
@@ -152,6 +195,13 @@ describe MmapScanner do
|
|
152
195
|
let(:src){'0123456789'*1020}
|
153
196
|
subject{MmapScanner.new(src, 100, 10000)}
|
154
197
|
it_should_behave_like 'MmapScanner'
|
198
|
+
describe '.new with empty source' do
|
199
|
+
it 'returns empty MmapScanner' do
|
200
|
+
m = MmapScanner.new('')
|
201
|
+
m.size.should == 0
|
202
|
+
m.to_s.should be_empty
|
203
|
+
end
|
204
|
+
end
|
155
205
|
end
|
156
206
|
|
157
207
|
context 'with MmapScanner' do
|
@@ -163,5 +213,12 @@ describe MmapScanner do
|
|
163
213
|
let(:src){MmapScanner.new(@file)}
|
164
214
|
subject{MmapScanner.new(src, 100, 10000)}
|
165
215
|
it_should_behave_like 'MmapScanner'
|
216
|
+
describe '.new with empty source' do
|
217
|
+
it 'returns empty MmapScanner' do
|
218
|
+
m = MmapScanner.new(src, 1020, 0)
|
219
|
+
m.size.should == 0
|
220
|
+
m.to_s.should be_empty
|
221
|
+
end
|
222
|
+
end
|
166
223
|
end
|
167
224
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: mmapscanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: "0.
|
5
|
+
version: "0.3"
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- TOMITA Masahiro
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-03-
|
13
|
+
date: 2011-03-21 00:00:00 +09:00
|
14
14
|
default_executable:
|
15
15
|
dependencies: []
|
16
16
|
|