mmapscanner 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -43,13 +43,18 @@ Usage
43
43
  * size, length は mmap(2) したサイズを返します。
44
44
  * to_s は mmap(2) した領域を String で返します。Encoding は常に ASCII-8BIT です。
45
45
  * slice は mmap(2) した領域の一部を新たな MmapScanner オブジェクトで返します。
46
- * scan は正規表現に一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
46
+ * scan はポインタ位置で正規表現との一致を試みます。一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
47
+ * scan_until は scan と同じですが、現在のポインタの位置以降で一致を試みます。
47
48
  * check は scan と同じですが、ポインタを進めません。
48
49
  * skip は scan と同じですが、一致したバイト数を返します。
49
50
  * match? は check と同じですが、一致したバイト数を返します。
50
51
  * peek は指定したバイト数分のデータを返します。ポインタは進みません。
51
52
  * eos? はポインタが末尾に達していると true を返します。
52
53
  * rest はポインタ以降のデータを返します。
54
+ * matched は正規表現に一致した部分を MmapScanner オブジェクトで返します。
55
+ * matched(n) は正規表現の n番目の括弧に一致した部分を MmapScanner オブジェクトで返します。
56
+ * pos は現在のポインタの位置を返します。
57
+ * pos= でポインタ位置を変更することができます。
53
58
 
54
59
  Copyright
55
60
  ---------
data/ext/mmapscanner.c CHANGED
@@ -13,6 +13,15 @@ typedef struct {
13
13
  size_t size;
14
14
  } mmap_data_t;
15
15
 
16
+ typedef struct {
17
+ size_t offset;
18
+ size_t size;
19
+ size_t pos;
20
+ int matched;
21
+ size_t matched_pos;
22
+ struct re_registers regs;
23
+ } mmapscanner_t;
24
+
16
25
  static void mmap_free(mmap_data_t *data)
17
26
  {
18
27
  if (data->ptr)
@@ -33,12 +42,31 @@ static VALUE create_mmap_object(int fd, size_t offset, size_t size)
33
42
  return Data_Wrap_Struct(cMmap, 0, mmap_free, data);
34
43
  }
35
44
 
45
+ static void mmapscanner_free(mmapscanner_t *ms)
46
+ {
47
+ onig_region_free(&ms->regs, 0);
48
+ free(ms);
49
+ }
50
+
51
+ VALUE allocate(VALUE klass)
52
+ {
53
+ mmapscanner_t *ms;
54
+ ms = malloc(sizeof *ms);
55
+ ms->offset = 0;
56
+ ms->size = 0;
57
+ ms->pos = 0;
58
+ ms->matched_pos = 0;
59
+ onig_region_init(&ms->regs);
60
+ return Data_Wrap_Struct(klass, 0, mmapscanner_free, ms);
61
+ }
62
+
36
63
  static VALUE initialize(int argc, VALUE *argv, VALUE obj)
37
64
  {
38
65
  VALUE src, voffset, vsize;
39
66
  size_t offset, size;
40
67
  size_t src_offset, src_size;
41
68
  VALUE src_data;
69
+ mmapscanner_t *ms;
42
70
 
43
71
  rb_scan_args(argc, argv, "12", &src, &voffset, &vsize);
44
72
  if (voffset != Qnil && NUM2LL(voffset) < 0)
@@ -47,8 +75,9 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
47
75
  rb_raise(rb_eRangeError, "length out of range: %lld", NUM2LL(vsize));
48
76
  offset = voffset == Qnil ? 0 : NUM2SIZET(voffset);
49
77
  if (rb_obj_class(src) == cMmapScanner) {
50
- src_offset = NUM2SIZET(rb_iv_get(src, "offset"));
51
- src_size = NUM2SIZET(rb_iv_get(src, "size"));
78
+ Data_Get_Struct(src, mmapscanner_t, ms);
79
+ src_offset = ms->offset;
80
+ src_size = ms->size;
52
81
  src_data = rb_iv_get(src, "data");
53
82
  } else if (TYPE(src) == T_FILE) {
54
83
  int fd;
@@ -68,27 +97,35 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
68
97
  } else {
69
98
  rb_raise(rb_eTypeError, "wrong argument type %s (expected File/String/MmapScanner)", rb_obj_classname(src));
70
99
  }
71
- if (offset >= src_size)
72
- rb_raise(rb_eRangeError, "length out of range: %zu >= %zu", offset, src_size);
100
+ if (offset > src_size)
101
+ rb_raise(rb_eRangeError, "length out of range: %zu > %zu", offset, src_size);
73
102
  size = vsize == Qnil ? src_size - offset : NUM2SIZET(vsize);
74
103
  if (size > src_size - offset)
75
104
  size = src_size - offset;
76
- rb_iv_set(obj, "offset", SIZET2NUM(src_offset + offset));
77
- rb_iv_set(obj, "size", SIZET2NUM(size));
105
+
106
+ Data_Get_Struct(obj, mmapscanner_t, ms);
107
+ ms->offset = src_offset + offset;
108
+ ms->size = size;
109
+ ms->pos = 0;
110
+ ms->matched = 0;
111
+ ms->matched_pos = 0;
78
112
  rb_iv_set(obj, "data", src_data);
79
- rb_iv_set(obj, "pos", INT2NUM(0));
80
113
  return Qnil;
81
114
  }
82
115
 
83
116
  static VALUE size(VALUE obj)
84
117
  {
85
- return rb_iv_get(obj, "size");
118
+ mmapscanner_t *ms;
119
+ Data_Get_Struct(obj, mmapscanner_t, ms);
120
+ return SIZET2NUM(ms->size);
86
121
  }
87
122
 
88
123
  static VALUE to_s(VALUE obj)
89
124
  {
90
- size_t offset = NUM2SIZET(rb_iv_get(obj, "offset"));
91
- size_t size = NUM2SIZET(rb_iv_get(obj, "size"));
125
+ mmapscanner_t *ms;
126
+ Data_Get_Struct(obj, mmapscanner_t, ms);
127
+ size_t offset = ms->offset;
128
+ size_t size = ms->size;
92
129
  VALUE data = rb_iv_get(obj, "data");
93
130
  mmap_data_t *mdata;
94
131
 
@@ -110,30 +147,35 @@ static VALUE inspect(VALUE obj)
110
147
 
111
148
  static VALUE pos(VALUE obj)
112
149
  {
113
- return rb_iv_get(obj, "pos");
150
+ mmapscanner_t *ms;
151
+ Data_Get_Struct(obj, mmapscanner_t, ms);
152
+ return SIZET2NUM(ms->pos);
114
153
  }
115
154
 
116
155
  static VALUE set_pos(VALUE obj, VALUE pos)
117
156
  {
157
+ mmapscanner_t *ms;
158
+ Data_Get_Struct(obj, mmapscanner_t, ms);
118
159
  size_t p, size;
119
160
 
120
161
  if (NUM2LL(pos) < 0)
121
162
  rb_raise(rb_eRangeError, "out of range: %lld", NUM2LL(pos));
122
163
  p = NUM2SIZET(pos);
123
- size = NUM2SIZET(rb_iv_get(obj, "size"));
164
+ size = ms->size;
124
165
  if (p > size)
125
166
  rb_raise(rb_eRangeError, "out of range: %zu > %zu", p, size);
126
- rb_iv_set(obj, "pos", pos);
167
+ ms->pos = p;
127
168
  return pos;
128
169
  }
129
170
 
130
- static VALUE scan_sub(VALUE obj, VALUE re, int forward)
171
+ static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeonly)
131
172
  {
173
+ mmapscanner_t *ms;
174
+ Data_Get_Struct(obj, mmapscanner_t, ms);
132
175
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
133
176
  regex_t *reg;
134
177
  int tmpreg;
135
178
  int result;
136
- struct re_registers regs;
137
179
  size_t old_pos, matched_len;
138
180
  char *ptr;
139
181
  size_t pos, size;
@@ -141,8 +183,8 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
141
183
  mmap_data_t *mdata;
142
184
 
143
185
  Check_Type(re, T_REGEXP);
144
- pos = NUM2SIZET(rb_iv_get(obj, "pos"));
145
- size = NUM2SIZET(rb_iv_get(obj, "size"));
186
+ pos = ms->pos;
187
+ size = ms->size;
146
188
  if (pos >= size)
147
189
  return Qnil;
148
190
  data = rb_iv_get(obj, "data");
@@ -152,17 +194,24 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
152
194
  Data_Get_Struct(data, mmap_data_t, mdata);
153
195
  ptr = mdata->ptr;
154
196
  }
155
- ptr += NUM2SIZET(rb_iv_get(obj, "offset"));
197
+ ptr += ms->offset;
156
198
 
157
199
  reg = rb_reg_prepare_re(re, rb_str_new("", 0));
158
200
  tmpreg = reg != RREGEXP(re)->ptr;
159
201
  if (!tmpreg) RREGEXP(re)->usecnt++;
160
202
 
161
- onig_region_init(&regs);
162
- result = onig_match(reg, (UChar* )(ptr+pos),
163
- (UChar* )(ptr+size),
164
- (UChar* )(ptr+pos),
165
- &regs, ONIG_OPTION_NONE);
203
+ if (headonly) {
204
+ result = onig_match(reg, (UChar*)(ptr+pos),
205
+ (UChar*)(ptr+size),
206
+ (UChar*)(ptr+pos),
207
+ &ms->regs, ONIG_OPTION_NONE);
208
+ } else {
209
+ result = onig_search(reg, (UChar*)(ptr+pos),
210
+ (UChar*)(ptr+size),
211
+ (UChar*)(ptr+pos),
212
+ (UChar*)(ptr+size),
213
+ &ms->regs, ONIG_OPTION_NONE);
214
+ }
166
215
  if (!tmpreg) RREGEXP(re)->usecnt--;
167
216
  if (tmpreg) {
168
217
  if (RREGEXP(re)->usecnt) {
@@ -175,65 +224,93 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
175
224
  if (result < 0)
176
225
  return Qnil;
177
226
  old_pos = pos;
178
- matched_len = regs.end[0];
227
+ matched_len = ms->regs.end[0];
179
228
  if (forward) {
180
229
  pos += matched_len;
181
- rb_iv_set(obj, "pos", SIZET2NUM(pos));
230
+ ms->pos = pos;
182
231
  }
183
- return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, ULL2NUM(old_pos), ULL2NUM(matched_len));
232
+ ms->matched = 1;
233
+ ms->matched_pos = old_pos;
234
+
235
+ if (sizeonly)
236
+ return SIZET2NUM(matched_len);
237
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(old_pos), SIZET2NUM(matched_len));
184
238
  }
185
239
 
186
240
  static VALUE scan(VALUE obj, VALUE re)
187
241
  {
188
- return scan_sub(obj, re, 1);
242
+ return scan_sub(obj, re, 1, 1, 0);
243
+ }
244
+
245
+ static VALUE scan_until(VALUE obj, VALUE re)
246
+ {
247
+ return scan_sub(obj, re, 1, 0, 0);
189
248
  }
190
249
 
191
250
  static VALUE check(VALUE obj, VALUE re)
192
251
  {
193
- return scan_sub(obj, re, 0);
252
+ return scan_sub(obj, re, 0, 1, 0);
194
253
  }
195
254
 
196
255
  static VALUE skip(VALUE obj, VALUE re)
197
256
  {
198
- VALUE ret = scan_sub(obj, re, 1);
199
- if (ret == Qnil)
200
- return ret;
201
- return rb_iv_get(ret, "size");
257
+ return scan_sub(obj, re, 1, 1, 1);
202
258
  }
203
259
 
204
260
  static VALUE match_p(VALUE obj, VALUE re)
205
261
  {
206
- VALUE ret = scan_sub(obj, re, 0);
207
- if (ret == Qnil)
208
- return ret;
209
- return rb_iv_get(ret, "size");
262
+ return scan_sub(obj, re, 0, 1, 1);
210
263
  }
211
264
 
212
265
  static VALUE peek(VALUE obj, VALUE size)
213
266
  {
267
+ mmapscanner_t *ms;
268
+ Data_Get_Struct(obj, mmapscanner_t, ms);
214
269
  size_t sz = NUM2SIZET(size);
215
- size_t data_pos = NUM2SIZET(rb_iv_get(obj, "pos"));
216
- size_t data_size = NUM2SIZET(rb_iv_get(obj, "size"));
217
- if (sz > data_size - data_pos)
218
- sz = data_size - data_pos;
219
- return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(data_pos), SIZET2NUM(sz));
270
+ if (sz > ms->size - ms->pos)
271
+ sz = ms->size - ms->pos;
272
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(ms->pos), SIZET2NUM(sz));
220
273
  }
221
274
 
222
275
  static VALUE eos_p(VALUE obj)
223
276
  {
224
- size_t data_pos = NUM2SIZET(rb_iv_get(obj, "pos"));
225
- size_t data_size = NUM2SIZET(rb_iv_get(obj, "size"));
226
- return data_pos >= data_size ? Qtrue : Qfalse;
277
+ mmapscanner_t *ms;
278
+ Data_Get_Struct(obj, mmapscanner_t, ms);
279
+ return ms->pos >= ms->size ? Qtrue : Qfalse;
227
280
  }
228
281
 
229
282
  static VALUE rest(VALUE obj)
230
283
  {
231
- return rb_funcall(cMmapScanner, rb_intern("new"), 2, obj, rb_iv_get(obj, "pos"));
284
+ mmapscanner_t *ms;
285
+ Data_Get_Struct(obj, mmapscanner_t, ms);
286
+ return rb_funcall(cMmapScanner, rb_intern("new"), 2, obj, SIZET2NUM(ms->pos));
287
+ }
288
+
289
+ static VALUE matched(int argc, VALUE *argv, VALUE obj)
290
+ {
291
+ mmapscanner_t *ms;
292
+ Data_Get_Struct(obj, mmapscanner_t, ms);
293
+ VALUE nth;
294
+ int i = 0;
295
+ size_t pos, len;
296
+
297
+ if (rb_scan_args(argc, argv, "01", &nth) == 1)
298
+ i = NUM2LONG(nth);
299
+ if (ms->matched == 0)
300
+ return Qnil;
301
+ if (i < 0)
302
+ return Qnil;
303
+ if (i >= ms->regs.num_regs)
304
+ return Qnil;
305
+ pos = ms->matched_pos + ms->regs.beg[i];
306
+ len = ms->regs.end[i] - ms->regs.beg[i];
307
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(pos), SIZET2NUM(len));
232
308
  }
233
309
 
234
310
  void Init_mmapscanner(void)
235
311
  {
236
312
  cMmapScanner = rb_define_class("MmapScanner", rb_cObject);
313
+ rb_define_alloc_func(cMmapScanner, allocate);
237
314
  rb_define_method(cMmapScanner, "initialize", initialize, -1);
238
315
  rb_define_method(cMmapScanner, "size", size, 0);
239
316
  rb_define_method(cMmapScanner, "length", size, 0);
@@ -244,12 +321,14 @@ void Init_mmapscanner(void)
244
321
  rb_define_method(cMmapScanner, "pos", pos, 0);
245
322
  rb_define_method(cMmapScanner, "pos=", set_pos, 1);
246
323
  rb_define_method(cMmapScanner, "scan", scan, 1);
324
+ rb_define_method(cMmapScanner, "scan_until", scan_until, 1);
247
325
  rb_define_method(cMmapScanner, "check", check, 1);
248
326
  rb_define_method(cMmapScanner, "skip", skip, 1);
249
327
  rb_define_method(cMmapScanner, "match?", match_p, 1);
250
328
  rb_define_method(cMmapScanner, "peek", peek, 1);
251
329
  rb_define_method(cMmapScanner, "eos?", eos_p, 0);
252
330
  rb_define_method(cMmapScanner, "rest", rest, 0);
331
+ rb_define_method(cMmapScanner, "matched", matched, -1);
253
332
 
254
333
  cMmap = rb_define_class_under(cMmapScanner, "Mmap", rb_cObject);
255
334
  }
@@ -51,6 +51,21 @@ describe MmapScanner do
51
51
  subject.pos.should == 10
52
52
  end
53
53
  end
54
+ describe '#scan_until' do
55
+ it 'returns matched data as MmapScanner' do
56
+ subject.scan(/012/)
57
+ ret = subject.scan_until(/678/)
58
+ ret.class.should == MmapScanner
59
+ ret.to_s.should == '345678'
60
+ end
61
+ it 'returns nil if not matched' do
62
+ subject.scan_until(/321/).should be_nil
63
+ end
64
+ it 'forward current position' do
65
+ subject.scan_until(/456/)
66
+ subject.pos.should == 7
67
+ end
68
+ end
54
69
  describe '#check' do
55
70
  it 'returns matched data as MmapScanner' do
56
71
  ret = subject.check(/\d{10}/)
@@ -89,6 +104,30 @@ describe MmapScanner do
89
104
  subject.pos.should == 0
90
105
  end
91
106
  end
107
+ describe '#matched' do
108
+ it 'returns matched data after scan' do
109
+ subject.scan(/\d{6}/)
110
+ subject.matched.to_s.should == '012345'
111
+ end
112
+ it 'returns matched data after scan_until' do
113
+ subject.scan_until(/4567/)
114
+ subject.matched.to_s.should == '4567'
115
+ end
116
+ it 'returns nil if there is not matched data' do
117
+ subject.matched.should be_nil
118
+ end
119
+ end
120
+ describe '#matched(nth)' do
121
+ it 'returns nth part of matched string' do
122
+ subject.scan(/(..)(..)(..)/)
123
+ subject.matched(0).to_s.should == '012345'
124
+ subject.matched(1).to_s.should == '01'
125
+ subject.matched(2).to_s.should == '23'
126
+ subject.matched(3).to_s.should == '45'
127
+ subject.matched(4).should be_nil
128
+ subject.matched(-1).should be_nil
129
+ end
130
+ end
92
131
  describe '#peek' do
93
132
  it 'returns MmapScanner' do
94
133
  subject.peek(10).should be_instance_of MmapScanner
@@ -115,6 +154,10 @@ describe MmapScanner do
115
154
  ret.should be_instance_of MmapScanner
116
155
  ret.to_s.should == '789'
117
156
  end
157
+ it 'returns empty MmapScanner if it reached to end' do
158
+ subject.pos = 10000
159
+ subject.rest.to_s.should == ''
160
+ end
118
161
  end
119
162
  describe '.new with position' do
120
163
  it '#size is length of rest data' do
@@ -152,6 +195,13 @@ describe MmapScanner do
152
195
  let(:src){'0123456789'*1020}
153
196
  subject{MmapScanner.new(src, 100, 10000)}
154
197
  it_should_behave_like 'MmapScanner'
198
+ describe '.new with empty source' do
199
+ it 'returns empty MmapScanner' do
200
+ m = MmapScanner.new('')
201
+ m.size.should == 0
202
+ m.to_s.should be_empty
203
+ end
204
+ end
155
205
  end
156
206
 
157
207
  context 'with MmapScanner' do
@@ -163,5 +213,12 @@ describe MmapScanner do
163
213
  let(:src){MmapScanner.new(@file)}
164
214
  subject{MmapScanner.new(src, 100, 10000)}
165
215
  it_should_behave_like 'MmapScanner'
216
+ describe '.new with empty source' do
217
+ it 'returns empty MmapScanner' do
218
+ m = MmapScanner.new(src, 1020, 0)
219
+ m.size.should == 0
220
+ m.to_s.should be_empty
221
+ end
222
+ end
166
223
  end
167
224
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: mmapscanner
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: "0.2"
5
+ version: "0.3"
6
6
  platform: ruby
7
7
  authors:
8
8
  - TOMITA Masahiro
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-03-02 00:00:00 +09:00
13
+ date: 2011-03-21 00:00:00 +09:00
14
14
  default_executable:
15
15
  dependencies: []
16
16