mmapscanner 0.3 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/README.md +1 -0
  2. data/ext/mmapscanner.c +72 -46
  3. metadata +2 -2
data/README.md CHANGED
@@ -53,6 +53,7 @@ Usage
53
53
  * rest はポインタ以降のデータを返します。
54
54
  * matched は正規表現に一致した部分を MmapScanner オブジェクトで返します。
55
55
  * matched(n) は正規表現の n番目の括弧に一致した部分を MmapScanner オブジェクトで返します。
56
+ * matched_str は matched と同じですが、文字列を返します。
56
57
  * pos は現在のポインタの位置を返します。
57
58
  * pos= でポインタ位置を変更することができます。
58
59
 
data/ext/mmapscanner.c CHANGED
@@ -20,6 +20,8 @@ typedef struct {
20
20
  int matched;
21
21
  size_t matched_pos;
22
22
  struct re_registers regs;
23
+ VALUE data;
24
+ VALUE dummy_str;
23
25
  } mmapscanner_t;
24
26
 
25
27
  static void mmap_free(mmap_data_t *data)
@@ -48,6 +50,12 @@ static void mmapscanner_free(mmapscanner_t *ms)
48
50
  free(ms);
49
51
  }
50
52
 
53
+ static void mark(mmapscanner_t *ms)
54
+ {
55
+ rb_gc_mark_maybe(ms->data);
56
+ rb_gc_mark_maybe(ms->dummy_str);
57
+ }
58
+
51
59
  VALUE allocate(VALUE klass)
52
60
  {
53
61
  mmapscanner_t *ms;
@@ -55,9 +63,11 @@ VALUE allocate(VALUE klass)
55
63
  ms->offset = 0;
56
64
  ms->size = 0;
57
65
  ms->pos = 0;
66
+ ms->matched = 0;
58
67
  ms->matched_pos = 0;
59
68
  onig_region_init(&ms->regs);
60
- return Data_Wrap_Struct(klass, 0, mmapscanner_free, ms);
69
+ ms->dummy_str = Qnil;
70
+ return Data_Wrap_Struct(klass, mark, mmapscanner_free, ms);
61
71
  }
62
72
 
63
73
  static VALUE initialize(int argc, VALUE *argv, VALUE obj)
@@ -78,7 +88,7 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
78
88
  Data_Get_Struct(src, mmapscanner_t, ms);
79
89
  src_offset = ms->offset;
80
90
  src_size = ms->size;
81
- src_data = rb_iv_get(src, "data");
91
+ src_data = ms->data;
82
92
  } else if (TYPE(src) == T_FILE) {
83
93
  int fd;
84
94
  struct stat st;
@@ -109,7 +119,7 @@ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
109
119
  ms->pos = 0;
110
120
  ms->matched = 0;
111
121
  ms->matched_pos = 0;
112
- rb_iv_set(obj, "data", src_data);
122
+ ms->data = src_data;
113
123
  return Qnil;
114
124
  }
115
125
 
@@ -123,16 +133,12 @@ static VALUE size(VALUE obj)
123
133
  static VALUE to_s(VALUE obj)
124
134
  {
125
135
  mmapscanner_t *ms;
126
- Data_Get_Struct(obj, mmapscanner_t, ms);
127
- size_t offset = ms->offset;
128
- size_t size = ms->size;
129
- VALUE data = rb_iv_get(obj, "data");
130
136
  mmap_data_t *mdata;
131
-
132
- if (TYPE(data) == T_STRING)
133
- return rb_str_new(RSTRING_PTR(data)+offset, size);
134
- Data_Get_Struct(data, mmap_data_t, mdata);
135
- return rb_str_new(mdata->ptr+offset, size);
137
+ Data_Get_Struct(obj, mmapscanner_t, ms);
138
+ if (TYPE(ms->data) == T_STRING)
139
+ return rb_str_new(RSTRING_PTR(ms->data) + ms->offset, ms->size);
140
+ Data_Get_Struct(ms->data, mmap_data_t, mdata);
141
+ return rb_str_new(mdata->ptr + ms->offset, ms->size);
136
142
  }
137
143
 
138
144
  static VALUE slice(VALUE obj, VALUE pos, VALUE len)
@@ -178,38 +184,36 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeon
178
184
  int result;
179
185
  size_t old_pos, matched_len;
180
186
  char *ptr;
181
- size_t pos, size;
182
- VALUE data;
183
187
  mmap_data_t *mdata;
184
188
 
189
+ ms->matched = 0;
185
190
  Check_Type(re, T_REGEXP);
186
- pos = ms->pos;
187
- size = ms->size;
188
- if (pos >= size)
191
+ if (ms->pos >= ms->size)
189
192
  return Qnil;
190
- data = rb_iv_get(obj, "data");
191
- if (TYPE(data) == T_STRING)
192
- ptr = RSTRING_PTR(data);
193
+ if (TYPE(ms->data) == T_STRING)
194
+ ptr = RSTRING_PTR(ms->data);
193
195
  else {
194
- Data_Get_Struct(data, mmap_data_t, mdata);
196
+ Data_Get_Struct(ms->data, mmap_data_t, mdata);
195
197
  ptr = mdata->ptr;
196
198
  }
197
199
  ptr += ms->offset;
198
200
 
199
- reg = rb_reg_prepare_re(re, rb_str_new("", 0));
201
+ if (ms->dummy_str == Qnil)
202
+ ms->dummy_str = rb_str_new("", 0);
203
+ reg = rb_reg_prepare_re(re, ms->dummy_str);
200
204
  tmpreg = reg != RREGEXP(re)->ptr;
201
205
  if (!tmpreg) RREGEXP(re)->usecnt++;
202
206
 
203
207
  if (headonly) {
204
- result = onig_match(reg, (UChar*)(ptr+pos),
205
- (UChar*)(ptr+size),
206
- (UChar*)(ptr+pos),
208
+ result = onig_match(reg, (UChar*)(ptr + ms->pos),
209
+ (UChar*)(ptr + ms->size),
210
+ (UChar*)(ptr + ms->pos),
207
211
  &ms->regs, ONIG_OPTION_NONE);
208
212
  } else {
209
- result = onig_search(reg, (UChar*)(ptr+pos),
210
- (UChar*)(ptr+size),
211
- (UChar*)(ptr+pos),
212
- (UChar*)(ptr+size),
213
+ result = onig_search(reg, (UChar*)(ptr + ms->pos),
214
+ (UChar*)(ptr + ms->size),
215
+ (UChar*)(ptr + ms->pos),
216
+ (UChar*)(ptr + ms->size),
213
217
  &ms->regs, ONIG_OPTION_NONE);
214
218
  }
215
219
  if (!tmpreg) RREGEXP(re)->usecnt--;
@@ -223,12 +227,10 @@ static VALUE scan_sub(VALUE obj, VALUE re, int forward, int headonly, int sizeon
223
227
  }
224
228
  if (result < 0)
225
229
  return Qnil;
226
- old_pos = pos;
230
+ old_pos = ms->pos;
227
231
  matched_len = ms->regs.end[0];
228
- if (forward) {
229
- pos += matched_len;
230
- ms->pos = pos;
231
- }
232
+ if (forward)
233
+ ms->pos += matched_len;
232
234
  ms->matched = 1;
233
235
  ms->matched_pos = old_pos;
234
236
 
@@ -286,27 +288,50 @@ static VALUE rest(VALUE obj)
286
288
  return rb_funcall(cMmapScanner, rb_intern("new"), 2, obj, SIZET2NUM(ms->pos));
287
289
  }
288
290
 
289
- static VALUE matched(int argc, VALUE *argv, VALUE obj)
291
+ static int matched_sub(int argc, VALUE *argv, mmapscanner_t *ms, size_t *pos, size_t *len)
290
292
  {
291
- mmapscanner_t *ms;
292
- Data_Get_Struct(obj, mmapscanner_t, ms);
293
- VALUE nth;
294
293
  int i = 0;
295
- size_t pos, len;
296
-
297
- if (rb_scan_args(argc, argv, "01", &nth) == 1)
298
- i = NUM2LONG(nth);
299
294
  if (ms->matched == 0)
300
- return Qnil;
295
+ return 0;
296
+ if (argc == 0)
297
+ i = 0;
298
+ else if (argc == 1)
299
+ i = NUM2LONG(argv[0]);
300
+ else
301
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for 0..1)", argc);
301
302
  if (i < 0)
302
- return Qnil;
303
+ return 0;
303
304
  if (i >= ms->regs.num_regs)
305
+ return 0;
306
+ *pos = ms->matched_pos + ms->regs.beg[i];
307
+ *len = ms->regs.end[i] - ms->regs.beg[i];
308
+ return 1;
309
+ }
310
+
311
+ static VALUE matched(int argc, VALUE *argv, VALUE obj)
312
+ {
313
+ mmapscanner_t *ms;
314
+ Data_Get_Struct(obj, mmapscanner_t, ms);
315
+ size_t pos, len;
316
+ if (matched_sub(argc, argv, ms, &pos, &len) == 0)
304
317
  return Qnil;
305
- pos = ms->matched_pos + ms->regs.beg[i];
306
- len = ms->regs.end[i] - ms->regs.beg[i];
307
318
  return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(pos), SIZET2NUM(len));
308
319
  }
309
320
 
321
+ static VALUE matched_str(int argc, VALUE *argv, VALUE obj)
322
+ {
323
+ mmapscanner_t *ms;
324
+ Data_Get_Struct(obj, mmapscanner_t, ms);
325
+ mmap_data_t *mdata;
326
+ size_t pos, len;
327
+ if (matched_sub(argc, argv, ms, &pos, &len) == 0)
328
+ return Qnil;
329
+ if (TYPE(ms->data) == T_STRING)
330
+ return rb_str_new(RSTRING_PTR(ms->data)+ms->offset+pos, len);
331
+ Data_Get_Struct(ms->data, mmap_data_t, mdata);
332
+ return rb_str_new(mdata->ptr+ms->offset+pos, len);
333
+ }
334
+
310
335
  void Init_mmapscanner(void)
311
336
  {
312
337
  cMmapScanner = rb_define_class("MmapScanner", rb_cObject);
@@ -329,6 +354,7 @@ void Init_mmapscanner(void)
329
354
  rb_define_method(cMmapScanner, "eos?", eos_p, 0);
330
355
  rb_define_method(cMmapScanner, "rest", rest, 0);
331
356
  rb_define_method(cMmapScanner, "matched", matched, -1);
357
+ rb_define_method(cMmapScanner, "matched_str", matched_str, -1);
332
358
 
333
359
  cMmap = rb_define_class_under(cMmapScanner, "Mmap", rb_cObject);
334
360
  }
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: mmapscanner
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: "0.3"
5
+ version: 0.3.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - TOMITA Masahiro
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-03-21 00:00:00 +09:00
13
+ date: 2011-03-23 00:00:00 +09:00
14
14
  default_executable:
15
15
  dependencies: []
16
16