rsec-ext 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,47 @@
1
+ module Rsec #:nodoc:
2
+
3
+ # make normal string parsing faster
4
+ class FixString < Unary
5
+ def until &p
6
+ UntilPattern[Regexp.new Regexp.escape some()].map &p
7
+ end
8
+
9
+ def as_word &p
10
+ Pattern[/\b#{Regexp.escape some}\b/].map p
11
+ end
12
+
13
+ # wrap with optional space by default
14
+ def wrap skip=/\s*/, &p
15
+ skip = Rsec.try_skip_pattern Rsec.make_parser skip
16
+ SeqOne[[skip, Pattern[/\b#{Regexp.escape some}\b/], skip], 1]
17
+ end
18
+ end
19
+
20
+ # optimize one_of() for byte-only string
21
+ class OneOfByte < OneOf
22
+ end
23
+
24
+ # optimize one_of_() for byte-only string
25
+ class OneOfByte_ < OneOf_
26
+ end
27
+
28
+ # overwrite prim initializer
29
+ [PDouble, PInt32, PUnsignedInt32].each do |k|
30
+ k.send :define_method, :initialize, ->l, r{
31
+ self.left = l
32
+ self.right = r
33
+ }
34
+ end
35
+ end
36
+
37
+ # require the so
38
+ require "rsec/predef"
39
+
40
+ class String
41
+ # overwrite string-to-parser transformer
42
+ define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
43
+ parser = ::Rsec::FixString[self]
44
+ parser.fail(*expects).map &p
45
+ }
46
+ end
47
+
@@ -0,0 +1,26 @@
1
+ require "mkmf"
2
+ require "fileutils"
3
+
4
+ # no make ext for other ruby versions
5
+ if RUBY_PLATFORM =~ /jruby|mac|rubinius/
6
+ File.open "#{File.dirname(__FILE__)}/Makefile", 'w' do |f|
7
+ end
8
+ exit 0
9
+ end
10
+
11
+ # no make ext for windows without C compiler
12
+ make_bat = "#{File.dirname(__FILE__)}/make.bat"
13
+ nmake_bat = "#{File.dirname(__FILE__)}/nmake.bat"
14
+ begin
15
+ system 'make -v' rescue system 'nmake /?'
16
+ FileUtils.rm_f make_bat
17
+ FileUtils.rm_f nmake_bat
18
+ create_makefile 'predef'
19
+ rescue => ex
20
+ puts "no make or nmake"
21
+ File.open make_bat, 'w' do |f|
22
+ end
23
+ File.open nmake_bat, 'w' do |f|
24
+ end
25
+ end
26
+
@@ -0,0 +1,448 @@
1
+ #include <stdlib.h>
2
+ #include <ruby.h>
3
+ #include <stdio.h>
4
+
5
+
6
+ // -----------------------------------------------------------------------------
7
+ // globals
8
+
9
+
10
+ static VALUE invalid;
11
+ static ID ID_parse;
12
+
13
+ struct strscanner {
14
+ unsigned long flags;
15
+ VALUE str;
16
+ long prev;
17
+ long curr;
18
+ };
19
+
20
+ static VALUE call_parse(VALUE parser, VALUE ctx) {
21
+ return rb_funcall2(parser, ID_parse, 1, &ctx);
22
+ }
23
+
24
+
25
+ // -----------------------------------------------------------------------------
26
+ // predefined number parser
27
+
28
+
29
+ static int is_hex(char* pointer) {
30
+ if (pointer[0] == '0')
31
+ if (pointer[1] == 'x' || pointer[1] == 'X')
32
+ return 1;
33
+ return 0;
34
+ }
35
+
36
+ #define DEFINE_PARSER(parser_name, res_type, float_parse_function, int_parse_function, convert_macro, is_floating) \
37
+ static VALUE parser_name(VALUE self, VALUE ctx) {\
38
+ char* pointer;\
39
+ char* tail;\
40
+ struct strscanner* ss;\
41
+ char first_char;\
42
+ VALUE* data = RSTRUCT_PTR(self);\
43
+ int limit;\
44
+ res_type res;\
45
+ Data_Get_Struct(ctx, struct strscanner, ss);\
46
+ limit = RSTRING_LEN(ss->str);\
47
+ if (ss->curr >= limit) return invalid;\
48
+ pointer = RSTRING_PTR(ss->str) + ss->curr;\
49
+ first_char = pointer[0];\
50
+ if (isspace(first_char)) return invalid;\
51
+ switch(data[0]) {\
52
+ case INT2FIX(0):\
53
+ if (first_char == '+' || first_char == '-') return invalid;\
54
+ break;\
55
+ case INT2FIX(1):\
56
+ if (first_char == '+') return invalid;\
57
+ break;\
58
+ case INT2FIX(2):\
59
+ if (first_char == '-') return invalid;\
60
+ break;\
61
+ }\
62
+ if (is_floating) {\
63
+ char* hex_check_ptr = pointer;\
64
+ if (first_char == '+' || first_char == '-') hex_check_ptr++;\
65
+ if (data[1] == Qtrue) /* true: hex */ \
66
+ if (! is_hex(hex_check_ptr))\
67
+ return invalid;\
68
+ if (data[1] == Qfalse) /* false: decimal */ \
69
+ if (is_hex(hex_check_ptr))\
70
+ return invalid;\
71
+ res = float_parse_function(pointer, &tail);\
72
+ } else {\
73
+ res = int_parse_function(pointer, &tail, FIX2INT(data[1]));\
74
+ }\
75
+ if (tail == pointer) {\
76
+ return invalid;\
77
+ } else {\
78
+ int distance = tail - pointer; /* tail points to the next char of the last char of the number */ \
79
+ if (ss->curr + distance > limit) {\
80
+ return invalid;\
81
+ } else if (errno == ERANGE) { /* out of range error */ \
82
+ return invalid;\
83
+ } else {\
84
+ ss->prev = ss->curr;\
85
+ ss->curr += distance;\
86
+ return convert_macro(res);\
87
+ }\
88
+ }\
89
+ }
90
+
91
+ // stubs for unified macro
92
+ #define int_stub strtol
93
+ #define float_stub strtod
94
+
95
+ DEFINE_PARSER(parse_double, double, strtod, int_stub, DBL2NUM, 1);
96
+ // XXX it is hard for pure ruby to determine single precision
97
+ DEFINE_PARSER(parse_int32, long, float_stub, strtol, INT2NUM, 0);
98
+ DEFINE_PARSER(parse_unsigned_int32, unsigned long, float_stub, strtoul, UINT2NUM, 0);
99
+ // XXX VC has no strtoll / strtoull
100
+
101
+ #undef int_stub
102
+ #undef float_stub
103
+
104
+ #undef DEFINE_PARSER
105
+
106
+
107
+ // -----------------------------------------------------------------------------
108
+ // change most used combinators _parse methods to C
109
+
110
+
111
+ static VALUE parse_seq(VALUE self, VALUE ctx) {
112
+ VALUE arr = RSTRUCT_PTR(self)[0];
113
+ VALUE* parsers = RARRAY_PTR(arr);
114
+ int len = RARRAY_LEN(arr);
115
+ volatile VALUE ret = rb_ary_new2(len);
116
+ int i;
117
+ volatile VALUE res = 0;
118
+
119
+ // We can't benefit from loop unwinding -_-
120
+ for (i = 0; i < len; i++) {
121
+ res = call_parse(parsers[i], ctx);
122
+ if (res == invalid) return invalid;
123
+ rb_ary_push(ret, res);
124
+ }
125
+ return ret;
126
+ }
127
+
128
+ static VALUE parse_seq_one(VALUE self, VALUE ctx) {
129
+ VALUE arr = RSTRUCT_PTR(self)[0];
130
+ int idx = NUM2INT(RSTRUCT_PTR(self)[1]);
131
+ VALUE* parsers = RARRAY_PTR(arr);
132
+ int len = RARRAY_LEN(arr);
133
+ VALUE ret = invalid;
134
+ volatile VALUE res = 0;
135
+ int i;
136
+
137
+ // We can't benefit from loop unwinding -_-
138
+ for (i = 0; i < len; i++) {
139
+ res = call_parse(parsers[i], ctx);
140
+ if (res == invalid) return invalid;
141
+ if (i == idx) ret = res;
142
+ }
143
+ return ret;
144
+ }
145
+
146
+ static VALUE parse_seq_(VALUE self, VALUE ctx) {
147
+ VALUE* struct_ptr = RSTRUCT_PTR(self);
148
+ VALUE first = struct_ptr[0];
149
+ volatile VALUE res = call_parse(first, ctx);
150
+ if (res == invalid) {
151
+ return invalid;
152
+ } else {
153
+ VALUE* rest = RARRAY_PTR(struct_ptr[1]);
154
+ VALUE skipper = struct_ptr[2];
155
+ int len = RARRAY_LEN(struct_ptr[1]);
156
+ volatile VALUE ret = rb_ary_new2(len + 1);
157
+ int i;
158
+
159
+ rb_ary_push(ret, res);
160
+ for (i = 0; i < len; i++) {
161
+ res = call_parse(skipper, ctx);
162
+ if (res == invalid) return invalid;
163
+ res = call_parse(rest[i], ctx);
164
+ if (res == invalid) return invalid;
165
+ rb_ary_push(ret, res);
166
+ }
167
+ return ret;
168
+ }
169
+ }
170
+
171
+ static VALUE parse_seq_one_(VALUE self, VALUE ctx) {
172
+ VALUE* struct_ptr = RSTRUCT_PTR(self);
173
+ VALUE first = struct_ptr[0];
174
+ volatile VALUE res = call_parse(first, ctx);
175
+ volatile VALUE ret = 0;
176
+ if (res == invalid) {
177
+ return invalid;
178
+ } else {
179
+ VALUE* rest = RARRAY_PTR(struct_ptr[1]);
180
+ VALUE skipper = struct_ptr[2];
181
+ int idx = NUM2INT(struct_ptr[3]);
182
+ int len = RARRAY_LEN(struct_ptr[1]);
183
+ int i;
184
+
185
+ if (0 == idx) ret = res;
186
+ idx--;
187
+ for (i = 0; i < len; i++) {
188
+ res = call_parse(skipper, ctx);
189
+ if (res == invalid) return invalid;
190
+ res = call_parse(rest[i], ctx);
191
+ if (res == invalid) return invalid;
192
+ if (i == idx) ret = res;
193
+ }
194
+ return ret;
195
+ }
196
+ }
197
+
198
+ static VALUE parse_branch(VALUE self, VALUE ctx) {
199
+ VALUE arr = RSTRUCT_PTR(self)[0];
200
+ VALUE* parsers = RARRAY_PTR(arr);
201
+ if (parsers) {
202
+ int len = RARRAY_LEN(arr);
203
+ int i, curr, prev;
204
+ struct strscanner* ss;
205
+ Data_Get_Struct(ctx, struct strscanner, ss);
206
+ curr = ss->curr;
207
+ prev = ss->prev;
208
+ for (i = 0; i < len; i++) {
209
+ VALUE res = call_parse(parsers[i], ctx);
210
+ if (res != invalid) return res;
211
+ ss->curr = curr;
212
+ ss->prev = prev;
213
+ }
214
+ return invalid;
215
+ } else {
216
+ rb_raise(rb_eRuntimeError, "or is not an array!");
217
+ }
218
+ }
219
+
220
+
221
+ // -----------------------------------------------------------------------------
222
+ // fast string parser
223
+
224
+
225
+ static VALUE parse_fix_string(VALUE self, VALUE ctx) {
226
+ struct strscanner* ss;
227
+ int i, len;
228
+ char* s1; // pattern
229
+ char* s2;
230
+ Data_Get_Struct(ctx, struct strscanner, ss);
231
+ VALUE pattern = RSTRUCT_PTR(self)[0]; // hack for self.some()
232
+ len = RSTRING_LEN(pattern);
233
+ if (ss->curr + len > RSTRING_LEN(ss->str))
234
+ return invalid;
235
+ s1 = RSTRING_PTR(pattern);
236
+ s2 = RSTRING_PTR(ss->str) + ss->curr;
237
+ for (i = 0; i < len; i++) {
238
+ if (s1[i] != s2[i])
239
+ return invalid;
240
+ }
241
+ ss->prev = ss->curr;
242
+ ss->curr += len;
243
+ return pattern; // self.some() is already frozen
244
+ }
245
+
246
+ static VALUE parse_one_of_byte(VALUE self, VALUE ctx) {
247
+ VALUE bytes = RSTRUCT_PTR(self)[0];
248
+ char* ptr = RSTRING_PTR(bytes);
249
+ int len = RSTRING_LEN(bytes);
250
+ struct strscanner* ss;
251
+ int limit, i;
252
+ char chr;
253
+
254
+ Data_Get_Struct(ctx, struct strscanner, ss);
255
+ limit = RSTRING_LEN(ss->str);
256
+ if (ss->curr >= limit) return invalid;
257
+ chr = RSTRING_PTR(ss->str)[ss->curr];
258
+ for (i = 0; i < len; i++) {
259
+ if (chr == ptr[i]) {
260
+ ss->curr ++;
261
+ char ret[1] = { chr };
262
+ return rb_str_new(ret, 1);
263
+ }
264
+ }
265
+ return invalid;
266
+ }
267
+
268
+ static VALUE parse_one_of_byte_(VALUE self, VALUE ctx) {
269
+ VALUE bytes = RSTRUCT_PTR(self)[0];
270
+ char* bytes_ptr = RSTRING_PTR(bytes);
271
+ int len = RSTRING_LEN(bytes);
272
+ struct strscanner* ss;
273
+ int limit, i;
274
+ char chr;
275
+ char* ptr;
276
+
277
+ Data_Get_Struct(ctx, struct strscanner, ss);
278
+ limit = RSTRING_LEN(ss->str);
279
+ ptr = RSTRING_PTR(ss->str);
280
+
281
+ // skip space
282
+ for(;;) {
283
+ // it is sure invalid because char cannot be epsilon
284
+ if (ss->curr >= limit) return invalid;
285
+ if (! isspace(ptr[ss->curr])) break;
286
+ ss->curr ++;
287
+ }
288
+ chr = ptr[ss->curr];
289
+ for (i = 0; i < len; i++) {
290
+ if (chr == bytes_ptr[i]) {
291
+ ss->curr ++;
292
+ // skip space
293
+ for (;;) {
294
+ if (ss->curr >= limit) break; // still valid
295
+ if (! isspace(ptr[ss->curr])) break;
296
+ ss->curr ++;
297
+ }
298
+ char ret[1] = { chr };
299
+ return rb_str_new(ret, 1);
300
+ }
301
+ }
302
+ return invalid;
303
+ }
304
+
305
+
306
+ // -----------------------------------------------------------------------------
307
+ // other
308
+
309
+
310
+ // keep =
311
+ // 1: keep inter only
312
+ // 2: keep token only
313
+ // 3: keep both
314
+ static VALUE proto_parse_join(VALUE self, VALUE ctx, int keep) {
315
+ VALUE token = RSTRUCT_PTR(self)[0];
316
+ VALUE inter = RSTRUCT_PTR(self)[1];
317
+ struct strscanner* ss;
318
+ volatile VALUE i = 0;
319
+ volatile VALUE t = 0;
320
+ volatile VALUE node = 0; // result
321
+ int save_point;
322
+
323
+ // pure translation of ruby code
324
+ t = call_parse(token, ctx);
325
+ if (t == invalid) return t;
326
+ node = rb_ary_new();
327
+ if (keep & 2)
328
+ rb_ary_push(node, t);
329
+
330
+ Data_Get_Struct(ctx, struct strscanner, ss);
331
+ for(;;) {
332
+ save_point = ss->curr;
333
+ i = call_parse(inter, ctx);
334
+ if (i == invalid) {
335
+ ss->curr = save_point;
336
+ break;
337
+ }
338
+ t = call_parse(token, ctx);
339
+ if (t == invalid) {
340
+ ss->curr = save_point;
341
+ break;
342
+ }
343
+ if (save_point == ss->curr) break;
344
+ if (keep & 1) rb_ary_push(node, i);
345
+ if (keep & 2) rb_ary_push(node, t);
346
+ }
347
+ return node;
348
+ }
349
+
350
+ static VALUE parse_join(VALUE self, VALUE ctx) {
351
+ return proto_parse_join(self, ctx, 3);
352
+ }
353
+
354
+ static VALUE parse_join_even(VALUE self, VALUE ctx) {
355
+ return proto_parse_join(self, ctx, 2);
356
+ }
357
+
358
+ static VALUE parse_join_odd(VALUE self, VALUE ctx) {
359
+ return proto_parse_join(self, ctx, 1);
360
+ }
361
+
362
+ static VALUE parse_map(VALUE self, VALUE ctx) {
363
+ VALUE* data = RSTRUCT_PTR(self);
364
+ VALUE res = call_parse(data[0], ctx);
365
+ if (res == invalid) return res;
366
+ return rb_proc_call(data[1], rb_ary_new3(1, res));
367
+ }
368
+
369
+ // function like ParseContext.on_fail, but don't re-define it
370
+ static VALUE parse_context_on_fail(VALUE self, VALUE mask) {
371
+ struct strscanner* ss = 0;
372
+ Data_Get_Struct(self, struct strscanner, ss);
373
+ if (ss) {
374
+ int pos = ss->curr;
375
+ int last_fail_pos = NUM2INT(rb_ivar_get(self, rb_intern("@last_fail_pos")));
376
+ if (pos > last_fail_pos) {
377
+ volatile VALUE new_fail_pos = INT2NUM(pos);
378
+ rb_ivar_set(self, rb_intern("@last_fail_pos"), INT2NUM(pos));
379
+ rb_ivar_set(self, rb_intern("@last_fail_mask"), mask);
380
+ } else if (pos == last_fail_pos) {
381
+ volatile VALUE last_fail_mask = rb_ivar_get(self, rb_intern("@last_fail_mask"));
382
+ last_fail_mask = rb_funcall(last_fail_mask, rb_intern("|"), 1, mask);
383
+ rb_ivar_set(self, rb_intern("@last_fail_mask"), last_fail_mask);
384
+ }
385
+ }
386
+ return Qnil;
387
+ }
388
+
389
+ static VALUE parse_fail(VALUE self, VALUE ctx) {
390
+ VALUE left = RSTRUCT_PTR(self)[0];
391
+ VALUE right = RSTRUCT_PTR(self)[1];
392
+ volatile VALUE res = call_parse(left, ctx);
393
+ if (res == invalid) {
394
+ parse_context_on_fail(ctx, right);
395
+ }
396
+ return res;
397
+ }
398
+
399
+
400
+ // -----------------------------------------------------------------------------
401
+ // init
402
+
403
+ #ifdef __cplusplus
404
+ extern "C"
405
+ #endif
406
+ void
407
+ #ifdef _WIN32
408
+ __declspec(dllexport)
409
+ #endif
410
+ Init_predef() {
411
+ VALUE rsec = rb_define_module("Rsec");
412
+ VALUE predef = rb_define_class_under(rsec, "Predef", rb_cObject);
413
+ invalid = rb_const_get(rsec, rb_intern("INVALID"));
414
+ ID_parse = rb_intern("_parse");
415
+ rb_include_module(predef, rsec);
416
+
417
+ // -----------------------------------------------------------------------------
418
+ // redefine parse methods
419
+
420
+ # define REDEFINE(klass_name, method) \
421
+ rb_define_method(rb_const_get(rsec, rb_intern(klass_name)), "_parse", method, 1)
422
+
423
+ REDEFINE("PDouble", parse_double);
424
+ // REDEFINE("PFloat", parse_float);
425
+ REDEFINE("PInt32", parse_int32);
426
+ // REDEFINE("PInt64", parse_int64);
427
+ REDEFINE("PUnsignedInt32", parse_unsigned_int32);
428
+ // REDEFINE("PUnsignedInt64", parse_unsigned_int64);
429
+
430
+ REDEFINE("Seq", parse_seq);
431
+ REDEFINE("Seq_", parse_seq_);
432
+ REDEFINE("SeqOne", parse_seq_one);
433
+ REDEFINE("SeqOne_", parse_seq_one_);
434
+ REDEFINE("Branch", parse_branch);
435
+
436
+ REDEFINE("FixString", parse_fix_string);
437
+ REDEFINE("OneOfByte", parse_one_of_byte);
438
+ REDEFINE("OneOfByte_", parse_one_of_byte_);
439
+
440
+ REDEFINE("Join", parse_join);
441
+ REDEFINE("JoinEven", parse_join_even);
442
+ REDEFINE("JoinOdd", parse_join_odd);
443
+ REDEFINE("Map", parse_map);
444
+ REDEFINE("Fail", parse_fail);
445
+
446
+ # undef REDEFINE
447
+ }
448
+
@@ -0,0 +1 @@
1
+ As Ruby's
@@ -0,0 +1,30 @@
1
+ == Parser / Regexp Combinator for Ruby.
2
+
3
+ Easier and faster than treetop / rex+racc.
4
+
5
+ It's ruby1.9 only.
6
+
7
+ == License
8
+
9
+ As Ruby's
10
+
11
+ == Install
12
+
13
+ The pure Ruby gem is fast enough (about 10+x faster than treetop generated code):
14
+
15
+ gem in rsec
16
+
17
+ For extreme performance under C Ruby:
18
+
19
+ gem in rsec-ext
20
+
21
+ It is about 30% faster than Haskell Parsec in the benchmark.
22
+
23
+ == Doc
24
+
25
+ http://rsec.heroku.com
26
+
27
+ == Code
28
+
29
+ http://github.com/luikore/rsec/tree/master
30
+
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rsec-ext
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - NS
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-02-24 00:00:00.000000000 +08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rsec
17
+ requirement: &24868296 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - =
21
+ - !ruby/object:Gem::Version
22
+ version: 0.3.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *24868296
26
+ description: Easy and extreme fast dynamic PEG parser combinator.
27
+ email:
28
+ executables: []
29
+ extensions:
30
+ - ext/rsec/extconf.rb
31
+ extra_rdoc_files:
32
+ - readme.rdoc
33
+ files:
34
+ - license.txt
35
+ - readme.rdoc
36
+ - ext/rsec/ext.rb
37
+ - ext/rsec/predef.c
38
+ - ext/rsec/extconf.rb
39
+ has_rdoc: true
40
+ homepage: http://rsec.heroku.com
41
+ licenses: []
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - ext
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: 1.9.1
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.5.2
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Extreme Fast Parser Combinator for Ruby, the C extension part
64
+ test_files: []