mmapscanner 0.1a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,60 @@
1
+ MmapScanner
2
+ ===========
3
+
4
+ Description
5
+ -----------
6
+
7
+ 文字列の代わりにファイルを mmap(2) した領域に対して StringScanner のようなことをするものです。
8
+
9
+ Installation
10
+ ------------
11
+
12
+ $ cd ext
13
+ $ ruby ./extconf.rb
14
+ $ make
15
+ $ sudo make install
16
+
17
+ Gem Installation
18
+ ----------------
19
+
20
+ $ gem install mmapscanner
21
+
22
+ Features
23
+ --------
24
+
25
+ * ファイルを mmap(2) した領域から正規表現に適合した部分データを返します。
26
+ * 返されるデータも MmapScanner オブジェクトで、最初にファイルから mmap(2) した領域を共有しています。
27
+ * mmap(2) を使用しているので大量データでもメモリを消費しません。to_s することではじめて String オブジェクトを生成します。
28
+
29
+ Usage
30
+ -----
31
+
32
+ * MmapScanner.new でファイルを mmap(2) します。mmap(2) できないファイルやパラメータを渡すとエラーになります。
33
+
34
+ # ファイル全体を mmap
35
+ ms = MmapScanner.new(File.open("filename"))
36
+ # ファイルの先頭 4096 バイト以降を mmap
37
+ ms = MmapScanner.new(File.open("filename"), 4096)
38
+ # ファイルの先頭 4096 バイト以降の 1234 バイト分を mmap
39
+ ms = MmapScanner.new(File.open("filename"), 4096, 1234)
40
+
41
+ * size, length は mmap(2) したサイズを返します。
42
+ * to_s は mmap(2) した領域を String で返します。Encoding は常に ASCII-8BIT です。
43
+ * slice は mmap(2) した領域の一部を新たな MmapScanner オブジェクトで返します。
44
+ * scan は正規表現に一致した部分を返し、ポインタを進めます。一致しない場合は nil を返します。
45
+ * check は scan と同じですが、ポインタを進めません。
46
+ * skip は scan と同じですが、一致したバイト数を返します。
47
+ * match? は check と同じですが、一致したバイト数を返します。
48
+ * peek は指定したバイト数分のデータを返します。ポインタは進みません。
49
+ * eos? はポインタが末尾に達していると true を返します。
50
+ * rest はポインタ以降のデータを返します。
51
+
52
+ Copyright
53
+ ---------
54
+
55
+ <dl>
56
+ <dt>Author<dd>TOMITA Masahiro <tommy@tmtm.org>
57
+ <dt>Copyrigh<dd>Copyright (c) 2011 TOMITA Masahiro
58
+ <dt>License<dd>Ruby's
59
+ </dl>
60
+
data/ext/extconf.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile 'mmapscanner'
data/ext/mmapscanner.c ADDED
@@ -0,0 +1,254 @@
1
+ #include <sys/types.h>
2
+ #include <sys/stat.h>
3
+ #include <unistd.h>
4
+ #include <sys/mman.h>
5
+ #include <ruby.h>
6
+ #include <ruby/io.h>
7
+
8
+ static VALUE cMmapScanner;
9
+
10
+ typedef struct {
11
+ char *ptr;
12
+ size_t size;
13
+ size_t pos;
14
+ } mmap_data_t;
15
+
16
+ static void mmap_free(mmap_data_t *data)
17
+ {
18
+ if (data->ptr)
19
+ munmap(data->ptr, data->size);
20
+ free(data);
21
+ }
22
+
23
+ static VALUE allocate(VALUE klass)
24
+ {
25
+ VALUE obj;
26
+ mmap_data_t *data;
27
+
28
+ data = xmalloc(sizeof(mmap_data_t));
29
+ data->ptr = NULL;
30
+ data->size = 0;
31
+ data->pos = 0;
32
+ obj = Data_Wrap_Struct(klass, 0, mmap_free, data);
33
+ rb_iv_set(obj, "parent", Qnil);
34
+ return obj;
35
+ }
36
+
37
+ static VALUE initialize(int argc, VALUE *argv, VALUE obj)
38
+ {
39
+ VALUE src, size, pos, p, pp;
40
+ int fd;
41
+ void *ptr;
42
+ mmap_data_t *data, *parent;
43
+ struct stat st;
44
+ size_t sz, offset;
45
+
46
+ Data_Get_Struct(obj, mmap_data_t, data);
47
+ if (data->ptr)
48
+ rb_raise(rb_eRuntimeError, "already initialized");
49
+ rb_scan_args(argc, argv, "12", &src, &pos, &size);
50
+ if (pos != Qnil && NUM2LL(pos) < 0)
51
+ rb_raise(rb_eRangeError, "position out of range: %lld", NUM2LL(pos));
52
+ if (size != Qnil && NUM2LL(size) < 0)
53
+ rb_raise(rb_eRangeError, "length out of range: %lld", NUM2LL(size));
54
+ offset = pos == Qnil ? 0 : NUM2SIZET(pos);
55
+ if (rb_obj_class(src) == cMmapScanner) {
56
+ Data_Get_Struct(src, mmap_data_t, parent);
57
+ if (offset >= parent->size)
58
+ rb_raise(rb_eRangeError, "length out of range: %zu >= %zu", offset, parent->size);
59
+ sz = size == Qnil ? parent->size - offset : NUM2SIZET(size);
60
+ ptr = parent->ptr + offset;
61
+ if (sz > parent->size - offset)
62
+ sz = parent->size-offset;
63
+ data->ptr = ptr;
64
+ data->size = sz;
65
+ p = src;
66
+ while ((pp = rb_iv_get(p, "parent")) != Qnil)
67
+ p = pp;
68
+ rb_iv_set(obj, "parent", p);
69
+ return;
70
+ }
71
+ Check_Type(src, T_FILE);
72
+ fd = RFILE(src)->fptr->fd;
73
+ fstat(fd, &st);
74
+ sz = size == Qnil ? st.st_size - offset : NUM2SIZET(size);
75
+ if (sz > st.st_size - offset)
76
+ sz = st.st_size - offset;
77
+ if ((ptr = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, offset)) == MAP_FAILED) {
78
+ rb_exc_raise(rb_funcall(rb_eSystemCallError, rb_intern("new"), 1, INT2FIX(errno)));
79
+ }
80
+ data->ptr = ptr;
81
+ data->size = sz;
82
+ }
83
+
84
+ static VALUE size(VALUE obj)
85
+ {
86
+ mmap_data_t *data;
87
+
88
+ Data_Get_Struct(obj, mmap_data_t, data);
89
+ return ULL2NUM(data->size);
90
+ }
91
+
92
+ static VALUE to_s(VALUE obj)
93
+ {
94
+ mmap_data_t *data;
95
+
96
+ Data_Get_Struct(obj, mmap_data_t, data);
97
+ return rb_str_new(data->ptr, data->size);
98
+ }
99
+
100
+ static VALUE slice(VALUE obj, VALUE pos, VALUE len)
101
+ {
102
+ size_t offset;
103
+ size_t length;
104
+ mmap_data_t *data;
105
+
106
+ Data_Get_Struct(obj, mmap_data_t, data);
107
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, pos, len);
108
+ }
109
+
110
+ static VALUE inspect(VALUE obj)
111
+ {
112
+ rb_str_new2("#<MmapScanner>");
113
+ }
114
+
115
+ static VALUE pos(VALUE obj)
116
+ {
117
+ mmap_data_t *data;
118
+
119
+ Data_Get_Struct(obj, mmap_data_t, data);
120
+ return ULL2NUM(data->pos);
121
+ }
122
+
123
+ static VALUE set_pos(VALUE obj, VALUE pos)
124
+ {
125
+ mmap_data_t *data;
126
+ size_t p;
127
+
128
+ if (NUM2LL(pos) < 0)
129
+ rb_raise(rb_eRangeError, "out of range: %lld", NUM2LL(pos));
130
+ Data_Get_Struct(obj, mmap_data_t, data);
131
+ p = NUM2SIZET(pos);
132
+ if (p > data->size)
133
+ rb_raise(rb_eRangeError, "out of range: %zu > %zu", p, data->size);
134
+ data->pos = p;
135
+ return pos;
136
+ }
137
+
138
+ static VALUE scan_sub(VALUE obj, VALUE re, int forward)
139
+ {
140
+ regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
141
+ mmap_data_t *data;
142
+ regex_t *reg;
143
+ int tmpreg;
144
+ int result;
145
+ struct re_registers regs;
146
+ size_t old_pos, matched_len;
147
+
148
+ Check_Type(re, T_REGEXP);
149
+ Data_Get_Struct(obj, mmap_data_t, data);
150
+ if (data->pos >= data->size)
151
+ return Qnil;
152
+
153
+ reg = rb_reg_prepare_re(re, rb_str_new("", 0));
154
+ tmpreg = reg != RREGEXP(re)->ptr;
155
+ if (!tmpreg) RREGEXP(re)->usecnt++;
156
+
157
+ onig_region_init(&regs);
158
+ result = onig_match(reg, (UChar* )(data->ptr+data->pos),
159
+ (UChar* )(data->ptr+data->size),
160
+ (UChar* )(data->ptr+data->pos),
161
+ &regs, ONIG_OPTION_NONE);
162
+ if (!tmpreg) RREGEXP(re)->usecnt--;
163
+ if (tmpreg) {
164
+ if (RREGEXP(re)->usecnt) {
165
+ onig_free(reg);
166
+ } else {
167
+ onig_free(RREGEXP(re)->ptr);
168
+ RREGEXP(re)->ptr = reg;
169
+ }
170
+ }
171
+ if (result < 0)
172
+ return Qnil;
173
+ old_pos = data->pos;
174
+ matched_len = regs.end[0];
175
+ if (forward)
176
+ data->pos += matched_len;
177
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, ULL2NUM(old_pos), ULL2NUM(matched_len));
178
+ }
179
+
180
+ static VALUE scan(VALUE obj, VALUE re)
181
+ {
182
+ return scan_sub(obj, re, 1);
183
+ }
184
+
185
+ static VALUE check(VALUE obj, VALUE re)
186
+ {
187
+ return scan_sub(obj, re, 0);
188
+ }
189
+
190
+ static VALUE skip(VALUE obj, VALUE re)
191
+ {
192
+ mmap_data_t *data;
193
+ VALUE ret = scan_sub(obj, re, 1);
194
+ if (ret == Qnil)
195
+ return ret;
196
+ Data_Get_Struct(ret, mmap_data_t, data);
197
+ return ULL2NUM(data->size);
198
+ }
199
+
200
+ static VALUE match_p(VALUE obj, VALUE re)
201
+ {
202
+ mmap_data_t *data;
203
+ VALUE ret = scan_sub(obj, re, 0);
204
+ if (ret == Qnil)
205
+ return ret;
206
+ Data_Get_Struct(ret, mmap_data_t, data);
207
+ return ULL2NUM(data->size);
208
+ }
209
+
210
+ static VALUE peek(VALUE obj, VALUE size)
211
+ {
212
+ size_t sz = NUM2SIZET(size);
213
+ mmap_data_t *data;
214
+ Data_Get_Struct(obj, mmap_data_t, data);
215
+ if (sz > data->size - data->pos)
216
+ sz = data->size - data->pos;
217
+ return rb_funcall(cMmapScanner, rb_intern("new"), 3, obj, SIZET2NUM(data->pos), SIZET2NUM(sz));
218
+ }
219
+
220
+ static VALUE eos_p(VALUE obj)
221
+ {
222
+ mmap_data_t *data;
223
+ Data_Get_Struct(obj, mmap_data_t, data);
224
+ return data->pos >= data->size ? Qtrue : Qfalse;
225
+ }
226
+
227
+ static VALUE rest(VALUE obj)
228
+ {
229
+ mmap_data_t *data;
230
+ Data_Get_Struct(obj, mmap_data_t, data);
231
+ return rb_funcall(cMmapScanner, rb_intern("new"), 2, obj, SIZET2NUM(data->pos));
232
+ }
233
+
234
+ void Init_mmapscanner(void)
235
+ {
236
+ cMmapScanner = rb_define_class("MmapScanner", rb_cObject);
237
+ rb_define_alloc_func(cMmapScanner, allocate);
238
+ rb_define_method(cMmapScanner, "initialize", initialize, -1);
239
+ rb_define_method(cMmapScanner, "size", size, 0);
240
+ rb_define_method(cMmapScanner, "length", size, 0);
241
+ rb_define_method(cMmapScanner, "to_s", to_s, 0);
242
+ rb_define_method(cMmapScanner, "slice", slice, 2);
243
+ // rb_define_method(cMmapScanner, "[]", slice, 2);
244
+ rb_define_method(cMmapScanner, "inspect", inspect, 0);
245
+ rb_define_method(cMmapScanner, "pos", pos, 0);
246
+ rb_define_method(cMmapScanner, "pos=", set_pos, 1);
247
+ rb_define_method(cMmapScanner, "scan", scan, 1);
248
+ rb_define_method(cMmapScanner, "check", check, 1);
249
+ rb_define_method(cMmapScanner, "skip", skip, 1);
250
+ rb_define_method(cMmapScanner, "match?", match_p, 1);
251
+ rb_define_method(cMmapScanner, "peek", peek, 1);
252
+ rb_define_method(cMmapScanner, "eos?", eos_p, 0);
253
+ rb_define_method(cMmapScanner, "rest", rest, 0);
254
+ }
@@ -0,0 +1,141 @@
1
+ require 'tempfile'
2
+
3
+ $LOAD_PATH.unshift "#{File.dirname __FILE__}/../ext"
4
+ require 'mmapscanner'
5
+
6
+ describe MmapScanner do
7
+ before do
8
+ tmpf = Tempfile.new 'mmapscanner'
9
+ tmpf.write '0123456789'*1000
10
+ @file = File.open(tmpf.path)
11
+ end
12
+ subject{MmapScanner.new(@file)}
13
+ it '#size returns size of file' do
14
+ subject.size.should == 10000
15
+ end
16
+ it '#to_s returns contents of file' do
17
+ subject.to_s.should == '0123456789'*1000
18
+ end
19
+ describe '#slice' do
20
+ it 'returns MmapScanner' do
21
+ subject.slice(10, 100).should be_instance_of MmapScanner
22
+ end
23
+ end
24
+ it '#inspect returns "#<MmapScanner>"' do
25
+ subject.inspect.should == '#<MmapScanner>'
26
+ end
27
+ it '#pos returns current position' do
28
+ subject.pos.should == 0
29
+ subject.scan(/.../)
30
+ subject.pos.should == 3
31
+ end
32
+ describe '#pos=' do
33
+ it 'change current position' do
34
+ subject.pos = 100
35
+ subject.pos.should == 100
36
+ end
37
+ it 'raise error when negative value' do
38
+ expect{subject.pos = -1}.to raise_error(RangeError, 'out of range: -1')
39
+ end
40
+ it 'raise error when over size' do
41
+ expect{subject.pos = 10001}.to raise_error(RangeError, 'out of range: 10001 > 10000')
42
+ expect{subject.pos = 20000}.to raise_error(RangeError, 'out of range: 20000 > 10000')
43
+ end
44
+ end
45
+ describe '#scan' do
46
+ it 'returns matched data as MmapScanner' do
47
+ ret = subject.scan(/\d{10}/)
48
+ ret.class.should == MmapScanner
49
+ ret.to_s.should == '0123456789'
50
+ end
51
+ it 'returns nil if not matched' do
52
+ subject.scan(/123/).should be_nil
53
+ end
54
+ it 'forward current position' do
55
+ subject.scan(/\d{10}/)
56
+ subject.pos.should == 10
57
+ end
58
+ end
59
+ describe '#check' do
60
+ it 'returns matched data as MmapScanner' do
61
+ ret = subject.check(/\d{10}/)
62
+ ret.class.should == MmapScanner
63
+ ret.to_s.should == '0123456789'
64
+ end
65
+ it 'returns nil if not matched' do
66
+ subject.check(/123/).should be_nil
67
+ end
68
+ it 'do not forward current position' do
69
+ ret = subject.check(/\d{10}/)
70
+ subject.pos.should == 0
71
+ end
72
+ end
73
+ describe '#skip' do
74
+ it 'returns length of matched data' do
75
+ subject.skip(/\d{10}/).should == 10
76
+ end
77
+ it 'returns nil if not matched' do
78
+ subject.skip(/123/).should be_nil
79
+ end
80
+ it 'forward current position' do
81
+ subject.skip(/\d{10}/)
82
+ subject.pos.should == 10
83
+ end
84
+ end
85
+ describe '#match?' do
86
+ it 'returns length of matched data' do
87
+ subject.match?(/\d{10}/).should == 10
88
+ end
89
+ it 'returns nil if not matched' do
90
+ subject.match?(/123/).should be_nil
91
+ end
92
+ it 'do not forward current position' do
93
+ subject.match?(/\d{10}/)
94
+ subject.pos.should == 0
95
+ end
96
+ end
97
+ describe '#peek' do
98
+ it 'returns MmapScanner' do
99
+ subject.peek(10).should be_instance_of MmapScanner
100
+ end
101
+ it 'do not forward current position' do
102
+ subject.peek(10)
103
+ subject.pos.should == 0
104
+ end
105
+ end
106
+ describe '#eos?' do
107
+ it 'returns true if eos' do
108
+ subject.pos = 10000
109
+ subject.eos?.should == true
110
+ end
111
+ it 'returns false if not eos' do
112
+ subject.pos = 9999
113
+ subject.eos?.should == false
114
+ end
115
+ end
116
+ describe '#rest' do
117
+ it 'returns rest data as MmapScanner' do
118
+ subject.pos = 9997
119
+ ret = subject.rest
120
+ ret.should be_instance_of MmapScanner
121
+ ret.to_s.should == '789'
122
+ end
123
+ end
124
+ describe '.new with position' do
125
+ it '#size is length of rest data' do
126
+ MmapScanner.new(@file, 4096).size.should == 10000-4096
127
+ end
128
+ it 'raise error when invalid position' do
129
+ expect{MmapScanner.new(@file, 4095)}.to raise_error(Errno::EINVAL)
130
+ end
131
+ end
132
+ describe '.new with length' do
133
+ subject{MmapScanner.new(@file, nil, 10)}
134
+ it '#size is specified size' do
135
+ subject.size.should == 10
136
+ end
137
+ it 'raise error when negative' do
138
+ expect{MmapScanner.new(@file, nil, -1)}.to raise_error(RangeError, 'length out of range: -1')
139
+ end
140
+ end
141
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mmapscanner
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: 3
5
+ version: 0.1a
6
+ platform: ruby
7
+ authors:
8
+ - TOMITA Masahiro
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-02-05 00:00:00 +09:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description:
18
+ email: tommy@tmtm.org
19
+ executables: []
20
+
21
+ extensions:
22
+ - ext/extconf.rb
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - README.md
27
+ - ext/mmapscanner.c
28
+ - spec/mmapscanner_spec.rb
29
+ - ext/extconf.rb
30
+ has_rdoc: true
31
+ homepage: http://github.com/tmtm/mmapscanner
32
+ licenses:
33
+ - Ruby's
34
+ post_install_message:
35
+ rdoc_options: []
36
+
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: 1.9.2
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ">"
49
+ - !ruby/object:Gem::Version
50
+ version: 1.3.1
51
+ requirements: []
52
+
53
+ rubyforge_project:
54
+ rubygems_version: 1.5.0
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: MmapScanner like StringScanner but it use mmap(2)-ed data
58
+ test_files:
59
+ - spec/mmapscanner_spec.rb