mmapscanner 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -43,13 +43,18 @@ Usage
43
43
  * size, length は mmap(2) したサイズを返します。
44
44
  * to_s は mmap(2) した領域を String で返します。Encoding は常に ASCII-8BIT です。
45
45
  * slice は mmap(2) した領域の一部を新たな MmapScanner オブジェクトで返します。
46
- * scan は正規表現に一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
46
+ * scan はポインタ位置で正規表現との一致を試みます。一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
47
+ * scan_until は scan と同じですが、現在のポインタの位置以降で一致を試みます。
47
48
  * check は scan と同じですが、ポインタを進めません。
48
49
  * skip は scan と同じですが、一致したバイト数を返します。
49
50
  * match? は check と同じですが、一致したバイト数を返します。
50
51
  * peek は指定したバイト数分のデータを返します。ポインタは進みません。
51
52
  * eos? はポインタが末尾に達していると true を返します。
52
53
  * rest はポインタ以降のデータを返します。
54
+ * matched は正規表現に一致した部分を MmapScanner オブジェクトで返します。
55
+ * matched(n) は正規表現の n番目の括弧に一致した部分を MmapScanner オブジェクトで返します。
56
+ * pos は現在のポインタの位置を返します。
57
+ * pos= でポインタ位置を変更することができます。
53
58
 
54
59
  Copyright
55
60
  ---------
data/ext/mmapscanner.c CHANGED
@@ -13,6 +13,15 @@ typedef struct {
13
13
  size_t size;
14
14
  } mmap_data_t;
15
15
 
16
+ typedef struct {
17
+ size_t offset;
18
+ size_t size;
19
+ size_t pos;
20
+ int matched;
21
+ size_t matched_pos;
22
+ struct re_registers regs;
23
+ } mmapscanner_t;
24
+
16
25
  static void mmap_free(mmap_data_t *data)
17
26
  {
18
27
  if (data->ptr)
@@ -33,12 +42,31 @@ static VALUE create_mmap_object(int fd, size_t offset, size_t size)
33
42
  return Data_Wrap_Struct(cMmap, 0, mmap_free, data);
34
43
  }
35
44
 
45
+ static void mmapscanner_free(mmapscanner_t *ms)
46
+ {
47
+ onig_region_free(&ms->regs, 0);
48
+ free(ms);
49
+ }
50
+
51
+ VALUE allocate(VALUE klass)
52
+ {
53
+ mmapscanner_t *ms;
54
+ ms = malloc(sizeof *ms);
55
+ ms->offset = 0;
56
+ ms->size = 0;
57
+ ms->pos = 0;
58
+ ms->matched_pos = 0;
59
+ onig_region_init(&ms->regs);
60
+ return Data_Wrap_Struct(klass, 0, mmapscanner_free, ms);
61
+ }
62
+
36
63
  static VALUE initialize(int argc, VALUE *argv, VALUE obj)
37
64
  {
38
65
  VALUE src, voffset, vsize;
39
66
  size_t offset, size;
40
67
  size_t src_offset, src_size;
41
68
  VALUE src_data;
69
+ mmapscanner_t *ms;
42
70
 
43
71
  rb_scan_args(argc, argv, "12", &src, &voffset, &vsize);
44
72
  if (voffset != Qnil && NUM2LL(voffset) < 0)
@@ -47,8 +75,9 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
47
75
  rb_raise(rb_eRangeError, "length out of range: %lld", NUM2LL(vsize));
48
76
  offset = voffset == Qnil ? 0 : NUM2SIZET(voffset);
49
77
  if (rb_obj_class(src) == cMmapScanner) {
50
- src_offset = NUM2SIZET(rb_iv_get(src, "offset"));
51
- src_size = NUM2SIZET(rb_iv_get(src, "size"));
78
+ Data_Get_Struct(src, mmapscanner_t, ms);
79
+ src_offset = ms->offset;
80
+ src_size = ms->size;
52
81
  src_data = rb_iv_get(src, "data");
53
82
  } else if (TYPE(src) == T_FILE) {
54
83
  int fd;
@@ -68,27 +97,35 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
68
97
  } else {
69
98
  rb_raise(rb_eTypeError, "wrong argument type %s (expected File/String/MmapScanner)", rb_obj_classname(src));
70
99
  }
71
- if (offset >= src_size)
72
- rb_raise(rb_eRangeError, "length out of range: %zu >= %zu", offset, src_size);
100
+ if (offset > src_size)
101
+ rb_raise(rb_eRangeError, "length out of range: %zu > %zu", offset, src_size);
73
102
  size = vsize == Qnil ? src_size - offset : NUM2SIZET(vsize);
74
103
  if (size > src_size - offset)
75
104
  size = src_size - offset;
76
- rb_iv_set(obj, "offset", SIZET2NUM(src_offset + offset));
77
- rb_iv_set(obj, "size", SIZET2NUM(size));
105
+
106
+ Data_Get_Struct(obj, mmapscanner_t, ms);
107
+ ms->offset = src_offset + offset;
108
+ ms->size = size;
109
+ ms->pos = 0;
110
+ ms->matched = 0;
111
+ ms->matched_pos = 0;
78
112
  rb_iv_set(obj, "data", src_data);
79
- rb_iv_set(obj, "pos", INT2NUM(0));
80
113
  return Qnil;
81
114
  }
82
115
 
83
116
  static VALUE size(VALUE obj)
84
117
  {
85
- return rb_iv_get(obj, "size");
118
+ mmapscanner_t *ms;
119
+ Data_Get_Struct(obj, mmapscanner_t, ms);
120
+ return SIZET2NUM(ms->size);
86
121
  }
87
122
 
88
123
  static VALUE to_s(VALUE obj)
89
124
  {
90
- size_t offset = NUM2SIZET(rb_iv_get(obj, "offset"));
91
- size_t size = NUM2SIZET(rb_iv_get(obj, "size"));
125
+ mmapscanner_t *ms;
126
+ Data_Get_Struct(obj, mmapscanner_t, ms);
127
+ size_t offset = ms->offset;
128
+ size_t size = ms->size;
92
129
  VALUE data = rb_iv_get(obj, "data");
93
130
  mmap_data_t *mdata;
94
131
 
@@ -110,30 +147,35 @@ static VALUE inspect(VALUE obj)
110
147
 
111
148
  static VALUE pos(VALUE obj)
112
149
  {
113
- return rb_iv_get(obj, "pos");
150
+ mmapscanner_t *ms;
151
+ Data_Get_Struct(obj, mmapscanner_t, ms);
152
+ return SIZET2NUM(ms->pos);
114
153
  }
115
154
 
116
155
  static VALUE set_pos(VALUE obj, VALUE pos)
117
156
  {
157
+ mmapscanner_t *ms;
158
+ Data_Get_Struct(obj, mmapscanner_t, ms);
118
159
  size_t p, size;
119
160
 
120
161
  if (NUM2LL(pos) < 0)
121
162
  rb_raise(rb_eRangeError, "out of range: %lld", NUM2LL(pos));
122
163
  p = NUM2SIZET(pos);
123
- size = NUM2SIZET(rb_iv_get(obj, "size"));
164
+ size = ms->size;
124
165
  if (p > size)
125
166
  rb_raise(rb_eRangeError, "out of range: %zu > %zu", p, size);
126
- rb_iv_set(obj, "pos", pos);
167
+ ms->pos = p;
127
168
  return pos;
128
169
  }
129
170
 
130
- static VALUE scan_sub(VALUE obj, VALUE re, int forward)
171
+ static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeonly)
131
172
  {
173
+ mmapscanner_t *ms;
174
+ Data_Get_Struct(obj, mmapscanner_t, ms);
132
175
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
133
176
  regex_t *reg;
134
177
  int tmpreg;
135
178
  int result;
136
- struct re_registers regs;
137
179
  size_t old_pos, matched_len;
138
180
  char *ptr;
139
181
  size_t pos, size;
@@ -141,8 +183,8 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
141
183
  mmap_data_t *mdata;
142
184
 
143
185
  Check_Type(re, T_REGEXP);
144
- pos = NUM2SIZET(rb_iv_get(obj, "pos"));
145
- size = NUM2SIZET(rb_iv_get(obj, "size"));
186
+ pos = ms->pos;
187
+ size = ms->size;
146
188
  if (pos >= size)
147
189
  return Qnil;
148
190
  data = rb_iv_get(obj, "data");
@@ -152,17 +194,24 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
152
194
  Data_Get_Struct(data, mmap_data_t, mdata);
153
195
  ptr = mdata->ptr;
154
196
  }
155
- ptr += NUM2SIZET(rb_iv_get(obj, "offset"));
197
+ ptr += ms->offset;
156
198
 
157
199
  reg = rb_reg_prepare_re(re, rb_str_new("", 0));
158
200
  tmpreg = reg != RREGEXP(re)->ptr;
159
201
  if (!tmpreg) RREGEXP(re)->usecnt++;
160
202
 
161
- onig_region_init(&regs);
162
- result = onig_match(reg, (UChar* )(ptr+pos),
163
- (UChar* )(ptr+size),
164
- (UChar* )(ptr+pos),
165
- &regs, ONIG_OPTION_NONE);
203
+ if (headonly) {
204
+ result = onig_match(reg, (UChar*)(ptr+pos),
205
+ (UChar*)(ptr+size),
206
+ (UChar*)(ptr+pos),
207
+ &ms->regs, ONIG_OPTION_NONE);
208
+ } else {
209
+ result = onig_search(reg, (UChar*)(ptr+pos),
210
+ (UChar*)(ptr+size),
211
+ (UChar*)(ptr+pos),
212
+ (UChar*)(ptr+size),
213
+ &ms->regs, ONIG_OPTION_NONE);
214
+ }
166
215
  if (!tmpreg) RREGEXP(re)->usecnt--;
167
216
  if (tmpreg) {
168
217
  if (RREGEXP(re)->usecnt) {
@@ -175,65 +224,93 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
175
224
  if (result < 0)
176
225
  return Qnil;
177
226
  old_pos = pos;
178
- matched_len = regs.end[0];
227
+ matched_len = ms->regs.end[0];
179
228
  if (forward) {
180
229
  pos += matched_len;
181
- rb_iv_set(obj, "pos", SIZET2NUM(pos));
230
+ ms->pos = pos;
182
231
  }
183
- return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, ULL2NUM(old_pos), ULL2NUM(matched_len));
232
+ ms->matched = 1;
233
+ ms->matched_pos = old_pos;
234
+
235
+ if (sizeonly)
236
+ return SIZET2NUM(matched_len);
237
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(old_pos), SIZET2NUM(matched_len));
184
238
  }
185
239
 
186
240
  static VALUE scan(VALUE obj, VALUE re)
187
241
  {
188
- return scan_sub(obj, re, 1);
242
+ return scan_sub(obj, re, 1, 1, 0);
243
+ }
244
+
245
+ static VALUE scan_until(VALUE obj, VALUE re)
246
+ {
247
+ return scan_sub(obj, re, 1, 0, 0);
189
248
  }
190
249
 
191
250
  static VALUE check(VALUE obj, VALUE re)
192
251
  {
193
- return scan_sub(obj, re, 0);
252
+ return scan_sub(obj, re, 0, 1, 0);
194
253
  }
195
254
 
196
255
  static VALUE skip(VALUE obj, VALUE re)
197
256
  {
198
- VALUE ret = scan_sub(obj, re, 1);
199
- if (ret == Qnil)
200
- return ret;
201
- return rb_iv_get(ret, "size");
257
+ return scan_sub(obj, re, 1, 1, 1);
202
258
  }
203
259
 
204
260
  static VALUE match_p(VALUE obj, VALUE re)
205
261
  {
206
- VALUE ret = scan_sub(obj, re, 0);
207
- if (ret == Qnil)
208
- return ret;
209
- return rb_iv_get(ret, "size");
262
+ return scan_sub(obj, re, 0, 1, 1);
210
263
  }
211
264
 
212
265
  static VALUE peek(VALUE obj, VALUE size)
213
266
  {
267
+ mmapscanner_t *ms;
268
+ Data_Get_Struct(obj, mmapscanner_t, ms);
214
269
  size_t sz = NUM2SIZET(size);
215
- size_t data_pos = NUM2SIZET(rb_iv_get(obj, "pos"));
216
- size_t data_size = NUM2SIZET(rb_iv_get(obj, "size"));
217
- if (sz > data_size - data_pos)
218
- sz = data_size - data_pos;
219
- return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(data_pos), SIZET2NUM(sz));
270
+ if (sz > ms->size - ms->pos)
271
+ sz = ms->size - ms->pos;
272
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(ms->pos), SIZET2NUM(sz));
220
273
  }
221
274
 
222
275
  static VALUE eos_p(VALUE obj)
223
276
  {
224
- size_t data_pos = NUM2SIZET(rb_iv_get(obj, "pos"));
225
- size_t data_size = NUM2SIZET(rb_iv_get(obj, "size"));
226
- return data_pos >= data_size ? Qtrue : Qfalse;
277
+ mmapscanner_t *ms;
278
+ Data_Get_Struct(obj, mmapscanner_t, ms);
279
+ return ms->pos >= ms->size ? Qtrue : Qfalse;
227
280
  }
228
281
 
229
282
  static VALUE rest(VALUE obj)
230
283
  {
231
- return rb_funcall(cMmapScanner, rb_intern("new"), 2, obj, rb_iv_get(obj, "pos"));
284
+ mmapscanner_t *ms;
285
+ Data_Get_Struct(obj, mmapscanner_t, ms);
286
+ return rb_funcall(cMmapScanner, rb_intern("new"), 2, obj, SIZET2NUM(ms->pos));
287
+ }
288
+
289
+ static VALUE matched(int argc, VALUE *argv, VALUE obj)
290
+ {
291
+ mmapscanner_t *ms;
292
+ Data_Get_Struct(obj, mmapscanner_t, ms);
293
+ VALUE nth;
294
+ int i = 0;
295
+ size_t pos, len;
296
+
297
+ if (rb_scan_args(argc, argv, "01", &nth) == 1)
298
+ i = NUM2LONG(nth);
299
+ if (ms->matched == 0)
300
+ return Qnil;
301
+ if (i < 0)
302
+ return Qnil;
303
+ if (i >= ms->regs.num_regs)
304
+ return Qnil;
305
+ pos = ms->matched_pos + ms->regs.beg[i];
306
+ len = ms->regs.end[i] - ms->regs.beg[i];
307
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(pos), SIZET2NUM(len));
232
308
  }
233
309
 
234
310
  void Init_mmapscanner(void)
235
311
  {
236
312
  cMmapScanner = rb_define_class("MmapScanner", rb_cObject);
313
+ rb_define_alloc_func(cMmapScanner, allocate);
237
314
  rb_define_method(cMmapScanner, "initialize", initialize, -1);
238
315
  rb_define_method(cMmapScanner, "size", size, 0);
239
316
  rb_define_method(cMmapScanner, "length", size, 0);
@@ -244,12 +321,14 @@ void Init_mmapscanner(void)
244
321
  rb_define_method(cMmapScanner, "pos", pos, 0);
245
322
  rb_define_method(cMmapScanner, "pos=", set_pos, 1);
246
323
  rb_define_method(cMmapScanner, "scan", scan, 1);
324
+ rb_define_method(cMmapScanner, "scan_until", scan_until, 1);
247
325
  rb_define_method(cMmapScanner, "check", check, 1);
248
326
  rb_define_method(cMmapScanner, "skip", skip, 1);
249
327
  rb_define_method(cMmapScanner, "match?", match_p, 1);
250
328
  rb_define_method(cMmapScanner, "peek", peek, 1);
251
329
  rb_define_method(cMmapScanner, "eos?", eos_p, 0);
252
330
  rb_define_method(cMmapScanner, "rest", rest, 0);
331
+ rb_define_method(cMmapScanner, "matched", matched, -1);
253
332
 
254
333
  cMmap = rb_define_class_under(cMmapScanner, "Mmap", rb_cObject);
255
334
  }
@@ -51,6 +51,21 @@ describe MmapScanner do
51
51
  subject.pos.should == 10
52
52
  end
53
53
  end
54
+ describe '#scan_until' do
55
+ it 'returns matched data as MmapScanner' do
56
+ subject.scan(/012/)
57
+ ret = subject.scan_until(/678/)
58
+ ret.class.should == MmapScanner
59
+ ret.to_s.should == '345678'
60
+ end
61
+ it 'returns nil if not matched' do
62
+ subject.scan_until(/321/).should be_nil
63
+ end
64
+ it 'forward current position' do
65
+ subject.scan_until(/456/)
66
+ subject.pos.should == 7
67
+ end
68
+ end
54
69
  describe '#check' do
55
70
  it 'returns matched data as MmapScanner' do
56
71
  ret = subject.check(/\d{10}/)
@@ -89,6 +104,30 @@ describe MmapScanner do
89
104
  subject.pos.should == 0
90
105
  end
91
106
  end
107
+ describe '#matched' do
108
+ it 'returns matched data after scan' do
109
+ subject.scan(/\d{6}/)
110
+ subject.matched.to_s.should == '012345'
111
+ end
112
+ it 'returns matched data after scan_until' do
113
+ subject.scan_until(/4567/)
114
+ subject.matched.to_s.should == '4567'
115
+ end
116
+ it 'returns nil if there is not matched data' do
117
+ subject.matched.should be_nil
118
+ end
119
+ end
120
+ describe '#matched(nth)' do
121
+ it 'returns nth part of matched string' do
122
+ subject.scan(/(..)(..)(..)/)
123
+ subject.matched(0).to_s.should == '012345'
124
+ subject.matched(1).to_s.should == '01'
125
+ subject.matched(2).to_s.should == '23'
126
+ subject.matched(3).to_s.should == '45'
127
+ subject.matched(4).should be_nil
128
+ subject.matched(-1).should be_nil
129
+ end
130
+ end
92
131
  describe '#peek' do
93
132
  it 'returns MmapScanner' do
94
133
  subject.peek(10).should be_instance_of MmapScanner
@@ -115,6 +154,10 @@ describe MmapScanner do
115
154
  ret.should be_instance_of MmapScanner
116
155
  ret.to_s.should == '789'
117
156
  end
157
+ it 'returns empty MmapScanner if it reached to end' do
158
+ subject.pos = 10000
159
+ subject.rest.to_s.should == ''
160
+ end
118
161
  end
119
162
  describe '.new with position' do
120
163
  it '#size is length of rest data' do
@@ -152,6 +195,13 @@ describe MmapScanner do
152
195
  let(:src){'0123456789'*1020}
153
196
  subject{MmapScanner.new(src, 100, 10000)}
154
197
  it_should_behave_like 'MmapScanner'
198
+ describe '.new with empty source' do
199
+ it 'returns empty MmapScanner' do
200
+ m = MmapScanner.new('')
201
+ m.size.should == 0
202
+ m.to_s.should be_empty
203
+ end
204
+ end
155
205
  end
156
206
 
157
207
  context 'with MmapScanner' do
@@ -163,5 +213,12 @@ describe MmapScanner do
163
213
  let(:src){MmapScanner.new(@file)}
164
214
  subject{MmapScanner.new(src, 100, 10000)}
165
215
  it_should_behave_like 'MmapScanner'
216
+ describe '.new with empty source' do
217
+ it 'returns empty MmapScanner' do
218
+ m = MmapScanner.new(src, 1020, 0)
219
+ m.size.should == 0
220
+ m.to_s.should be_empty
221
+ end
222
+ end
166
223
  end
167
224
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: mmapscanner
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: "0.2"
5
+ version: "0.3"
6
6
  platform: ruby
7
7
  authors:
8
8
  - TOMITA Masahiro
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-03-02 00:00:00 +09:00
13
+ date: 2011-03-21 00:00:00 +09:00
14
14
  default_executable:
15
15
  dependencies: []
16
16