rsec-ext 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ module Rsec #:nodoc:
2
+
3
+ # make normal string parsing faster
4
+ class FixString < Unary
5
+ def until &p
6
+ UntilPattern[Regexp.new Regexp.escape some()].map &p
7
+ end
8
+
9
+ def as_word &p
10
+ Pattern[/\b#{Regexp.escape some}\b/].map p
11
+ end
12
+
13
+ # wrap with optional space by default
14
+ def wrap skip=/\s*/, &p
15
+ skip = Rsec.try_skip_pattern Rsec.make_parser skip
16
+ SeqOne[[skip, Pattern[/\b#{Regexp.escape some}\b/], skip], 1]
17
+ end
18
+ end
19
+
20
+ # optimize one_of() for byte-only string
21
+ class OneOfByte < OneOf
22
+ end
23
+
24
+ # optimize one_of_() for byte-only string
25
+ class OneOfByte_ < OneOf_
26
+ end
27
+
28
+ # overwrite prim initializer
29
+ [PDouble, PInt32, PUnsignedInt32].each do |k|
30
+ k.send :define_method, :initialize, ->l, r{
31
+ self.left = l
32
+ self.right = r
33
+ }
34
+ end
35
+ end
36
+
37
+ # require the so
38
+ require "rsec/predef"
39
+
40
+ class String
41
+ # overwrite string-to-parser transformer
42
+ define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
43
+ parser = ::Rsec::FixString[self]
44
+ parser.fail(*expects).map &p
45
+ }
46
+ end
47
+
@@ -0,0 +1,26 @@
1
+ require "mkmf"
2
+ require "fileutils"
3
+
4
+ # no make ext for other ruby versions
5
+ if RUBY_PLATFORM =~ /jruby|mac|rubinius/
6
+ File.open "#{File.dirname(__FILE__)}/Makefile", 'w' do |f|
7
+ end
8
+ exit 0
9
+ end
10
+
11
+ # no make ext for windows without C compiler
12
+ make_bat = "#{File.dirname(__FILE__)}/make.bat"
13
+ nmake_bat = "#{File.dirname(__FILE__)}/nmake.bat"
14
+ begin
15
+ system 'make -v' rescue system 'nmake /?'
16
+ FileUtils.rm_f make_bat
17
+ FileUtils.rm_f nmake_bat
18
+ create_makefile 'predef'
19
+ rescue => ex
20
+ puts "no make or nmake"
21
+ File.open make_bat, 'w' do |f|
22
+ end
23
+ File.open nmake_bat, 'w' do |f|
24
+ end
25
+ end
26
+
@@ -0,0 +1,448 @@
1
+ #include <stdlib.h>
2
+ #include <ruby.h>
3
+ #include <stdio.h>
4
+
5
+
6
+ // -----------------------------------------------------------------------------
7
+ // globals
8
+
9
+
10
+ static VALUE invalid;
11
+ static ID ID_parse;
12
+
13
+ struct strscanner {
14
+ unsigned long flags;
15
+ VALUE str;
16
+ long prev;
17
+ long curr;
18
+ };
19
+
20
+ static VALUE call_parse(VALUE parser, VALUE ctx) {
21
+ return rb_funcall2(parser, ID_parse, 1, &ctx);
22
+ }
23
+
24
+
25
+ // -----------------------------------------------------------------------------
26
+ // predefined number parser
27
+
28
+
29
+ static int is_hex(char* pointer) {
30
+ if (pointer[0] == '0')
31
+ if (pointer[1] == 'x' || pointer[1] == 'X')
32
+ return 1;
33
+ return 0;
34
+ }
35
+
36
+ #define DEFINE_PARSER(parser_name, res_type, float_parse_function, int_parse_function, convert_macro, is_floating) \
37
+ static VALUE parser_name(VALUE self, VALUE ctx) {\
38
+ char* pointer;\
39
+ char* tail;\
40
+ struct strscanner* ss;\
41
+ char first_char;\
42
+ VALUE* data = RSTRUCT_PTR(self);\
43
+ int limit;\
44
+ res_type res;\
45
+ Data_Get_Struct(ctx, struct strscanner, ss);\
46
+ limit = RSTRING_LEN(ss->str);\
47
+ if (ss->curr >= limit) return invalid;\
48
+ pointer = RSTRING_PTR(ss->str) + ss->curr;\
49
+ first_char = pointer[0];\
50
+ if (isspace(first_char)) return invalid;\
51
+ switch(data[0]) {\
52
+ case INT2FIX(0):\
53
+ if (first_char == '+' || first_char == '-') return invalid;\
54
+ break;\
55
+ case INT2FIX(1):\
56
+ if (first_char == '+') return invalid;\
57
+ break;\
58
+ case INT2FIX(2):\
59
+ if (first_char == '-') return invalid;\
60
+ break;\
61
+ }\
62
+ if (is_floating) {\
63
+ char* hex_check_ptr = pointer;\
64
+ if (first_char == '+' || first_char == '-') hex_check_ptr++;\
65
+ if (data[1] == Qtrue) /* true: hex */ \
66
+ if (! is_hex(hex_check_ptr))\
67
+ return invalid;\
68
+ if (data[1] == Qfalse) /* false: decimal */ \
69
+ if (is_hex(hex_check_ptr))\
70
+ return invalid;\
71
+ res = float_parse_function(pointer, &tail);\
72
+ } else {\
73
+ res = int_parse_function(pointer, &tail, FIX2INT(data[1]));\
74
+ }\
75
+ if (tail == pointer) {\
76
+ return invalid;\
77
+ } else {\
78
+ int distance = tail - pointer; /* tail points to the next char of the last char of the number */ \
79
+ if (ss->curr + distance > limit) {\
80
+ return invalid;\
81
+ } else if (errno == ERANGE) { /* out of range error */ \
82
+ return invalid;\
83
+ } else {\
84
+ ss->prev = ss->curr;\
85
+ ss->curr += distance;\
86
+ return convert_macro(res);\
87
+ }\
88
+ }\
89
+ }
90
+
91
+ // stubs for unified macro
92
+ #define int_stub strtol
93
+ #define float_stub strtod
94
+
95
+ DEFINE_PARSER(parse_double, double, strtod, int_stub, DBL2NUM, 1);
96
+ // XXX it is hard for pure ruby to determine single precision
97
+ DEFINE_PARSER(parse_int32, long, float_stub, strtol, INT2NUM, 0);
98
+ DEFINE_PARSER(parse_unsigned_int32, unsigned long, float_stub, strtoul, UINT2NUM, 0);
99
+ // XXX VC has no strtoll / strtoull
100
+
101
+ #undef int_stub
102
+ #undef float_stub
103
+
104
+ #undef DEFINE_PARSER
105
+
106
+
107
+ // -----------------------------------------------------------------------------
108
+ // change most used combinators _parse methods to C
109
+
110
+
111
+ static VALUE parse_seq(VALUE self, VALUE ctx) {
112
+ VALUE arr = RSTRUCT_PTR(self)[0];
113
+ VALUE* parsers = RARRAY_PTR(arr);
114
+ int len = RARRAY_LEN(arr);
115
+ volatile VALUE ret = rb_ary_new2(len);
116
+ int i;
117
+ volatile VALUE res = 0;
118
+
119
+ // We can't benefit from loop unwinding -_-
120
+ for (i = 0; i < len; i++) {
121
+ res = call_parse(parsers[i], ctx);
122
+ if (res == invalid) return invalid;
123
+ rb_ary_push(ret, res);
124
+ }
125
+ return ret;
126
+ }
127
+
128
+ static VALUE parse_seq_one(VALUE self, VALUE ctx) {
129
+ VALUE arr = RSTRUCT_PTR(self)[0];
130
+ int idx = NUM2INT(RSTRUCT_PTR(self)[1]);
131
+ VALUE* parsers = RARRAY_PTR(arr);
132
+ int len = RARRAY_LEN(arr);
133
+ VALUE ret = invalid;
134
+ volatile VALUE res = 0;
135
+ int i;
136
+
137
+ // We can't benefit from loop unwinding -_-
138
+ for (i = 0; i < len; i++) {
139
+ res = call_parse(parsers[i], ctx);
140
+ if (res == invalid) return invalid;
141
+ if (i == idx) ret = res;
142
+ }
143
+ return ret;
144
+ }
145
+
146
+ static VALUE parse_seq_(VALUE self, VALUE ctx) {
147
+ VALUE* struct_ptr = RSTRUCT_PTR(self);
148
+ VALUE first = struct_ptr[0];
149
+ volatile VALUE res = call_parse(first, ctx);
150
+ if (res == invalid) {
151
+ return invalid;
152
+ } else {
153
+ VALUE* rest = RARRAY_PTR(struct_ptr[1]);
154
+ VALUE skipper = struct_ptr[2];
155
+ int len = RARRAY_LEN(struct_ptr[1]);
156
+ volatile VALUE ret = rb_ary_new2(len + 1);
157
+ int i;
158
+
159
+ rb_ary_push(ret, res);
160
+ for (i = 0; i < len; i++) {
161
+ res = call_parse(skipper, ctx);
162
+ if (res == invalid) return invalid;
163
+ res = call_parse(rest[i], ctx);
164
+ if (res == invalid) return invalid;
165
+ rb_ary_push(ret, res);
166
+ }
167
+ return ret;
168
+ }
169
+ }
170
+
171
+ static VALUE parse_seq_one_(VALUE self, VALUE ctx) {
172
+ VALUE* struct_ptr = RSTRUCT_PTR(self);
173
+ VALUE first = struct_ptr[0];
174
+ volatile VALUE res = call_parse(first, ctx);
175
+ volatile VALUE ret = 0;
176
+ if (res == invalid) {
177
+ return invalid;
178
+ } else {
179
+ VALUE* rest = RARRAY_PTR(struct_ptr[1]);
180
+ VALUE skipper = struct_ptr[2];
181
+ int idx = NUM2INT(struct_ptr[3]);
182
+ int len = RARRAY_LEN(struct_ptr[1]);
183
+ int i;
184
+
185
+ if (0 == idx) ret = res;
186
+ idx--;
187
+ for (i = 0; i < len; i++) {
188
+ res = call_parse(skipper, ctx);
189
+ if (res == invalid) return invalid;
190
+ res = call_parse(rest[i], ctx);
191
+ if (res == invalid) return invalid;
192
+ if (i == idx) ret = res;
193
+ }
194
+ return ret;
195
+ }
196
+ }
197
+
198
+ static VALUE parse_branch(VALUE self, VALUE ctx) {
199
+ VALUE arr = RSTRUCT_PTR(self)[0];
200
+ VALUE* parsers = RARRAY_PTR(arr);
201
+ if (parsers) {
202
+ int len = RARRAY_LEN(arr);
203
+ int i, curr, prev;
204
+ struct strscanner* ss;
205
+ Data_Get_Struct(ctx, struct strscanner, ss);
206
+ curr = ss->curr;
207
+ prev = ss->prev;
208
+ for (i = 0; i < len; i++) {
209
+ VALUE res = call_parse(parsers[i], ctx);
210
+ if (res != invalid) return res;
211
+ ss->curr = curr;
212
+ ss->prev = prev;
213
+ }
214
+ return invalid;
215
+ } else {
216
+ rb_raise(rb_eRuntimeError, "or is not an array!");
217
+ }
218
+ }
219
+
220
+
221
+ // -----------------------------------------------------------------------------
222
+ // fast string parser
223
+
224
+
225
+ static VALUE parse_fix_string(VALUE self, VALUE ctx) {
226
+ struct strscanner* ss;
227
+ int i, len;
228
+ char* s1; // pattern
229
+ char* s2;
230
+ Data_Get_Struct(ctx, struct strscanner, ss);
231
+ VALUE pattern = RSTRUCT_PTR(self)[0]; // hack for self.some()
232
+ len = RSTRING_LEN(pattern);
233
+ if (ss->curr + len > RSTRING_LEN(ss->str))
234
+ return invalid;
235
+ s1 = RSTRING_PTR(pattern);
236
+ s2 = RSTRING_PTR(ss->str) + ss->curr;
237
+ for (i = 0; i < len; i++) {
238
+ if (s1[i] != s2[i])
239
+ return invalid;
240
+ }
241
+ ss->prev = ss->curr;
242
+ ss->curr += len;
243
+ return pattern; // self.some() is already frozen
244
+ }
245
+
246
+ static VALUE parse_one_of_byte(VALUE self, VALUE ctx) {
247
+ VALUE bytes = RSTRUCT_PTR(self)[0];
248
+ char* ptr = RSTRING_PTR(bytes);
249
+ int len = RSTRING_LEN(bytes);
250
+ struct strscanner* ss;
251
+ int limit, i;
252
+ char chr;
253
+
254
+ Data_Get_Struct(ctx, struct strscanner, ss);
255
+ limit = RSTRING_LEN(ss->str);
256
+ if (ss->curr >= limit) return invalid;
257
+ chr = RSTRING_PTR(ss->str)[ss->curr];
258
+ for (i = 0; i < len; i++) {
259
+ if (chr == ptr[i]) {
260
+ ss->curr ++;
261
+ char ret[1] = { chr };
262
+ return rb_str_new(ret, 1);
263
+ }
264
+ }
265
+ return invalid;
266
+ }
267
+
268
+ static VALUE parse_one_of_byte_(VALUE self, VALUE ctx) {
269
+ VALUE bytes = RSTRUCT_PTR(self)[0];
270
+ char* bytes_ptr = RSTRING_PTR(bytes);
271
+ int len = RSTRING_LEN(bytes);
272
+ struct strscanner* ss;
273
+ int limit, i;
274
+ char chr;
275
+ char* ptr;
276
+
277
+ Data_Get_Struct(ctx, struct strscanner, ss);
278
+ limit = RSTRING_LEN(ss->str);
279
+ ptr = RSTRING_PTR(ss->str);
280
+
281
+ // skip space
282
+ for(;;) {
283
+ // it is sure invalid because char cannot be epsilon
284
+ if (ss->curr >= limit) return invalid;
285
+ if (! isspace(ptr[ss->curr])) break;
286
+ ss->curr ++;
287
+ }
288
+ chr = ptr[ss->curr];
289
+ for (i = 0; i < len; i++) {
290
+ if (chr == bytes_ptr[i]) {
291
+ ss->curr ++;
292
+ // skip space
293
+ for (;;) {
294
+ if (ss->curr >= limit) break; // still valid
295
+ if (! isspace(ptr[ss->curr])) break;
296
+ ss->curr ++;
297
+ }
298
+ char ret[1] = { chr };
299
+ return rb_str_new(ret, 1);
300
+ }
301
+ }
302
+ return invalid;
303
+ }
304
+
305
+
306
+ // -----------------------------------------------------------------------------
307
+ // other
308
+
309
+
310
+ // keep =
311
+ // 1: keep inter only
312
+ // 2: keep token only
313
+ // 3: keep both
314
+ static VALUE proto_parse_join(VALUE self, VALUE ctx, int keep) {
315
+ VALUE token = RSTRUCT_PTR(self)[0];
316
+ VALUE inter = RSTRUCT_PTR(self)[1];
317
+ struct strscanner* ss;
318
+ volatile VALUE i = 0;
319
+ volatile VALUE t = 0;
320
+ volatile VALUE node = 0; // result
321
+ int save_point;
322
+
323
+ // pure translation of ruby code
324
+ t = call_parse(token, ctx);
325
+ if (t == invalid) return t;
326
+ node = rb_ary_new();
327
+ if (keep & 2)
328
+ rb_ary_push(node, t);
329
+
330
+ Data_Get_Struct(ctx, struct strscanner, ss);
331
+ for(;;) {
332
+ save_point = ss->curr;
333
+ i = call_parse(inter, ctx);
334
+ if (i == invalid) {
335
+ ss->curr = save_point;
336
+ break;
337
+ }
338
+ t = call_parse(token, ctx);
339
+ if (t == invalid) {
340
+ ss->curr = save_point;
341
+ break;
342
+ }
343
+ if (save_point == ss->curr) break;
344
+ if (keep & 1) rb_ary_push(node, i);
345
+ if (keep & 2) rb_ary_push(node, t);
346
+ }
347
+ return node;
348
+ }
349
+
350
+ static VALUE parse_join(VALUE self, VALUE ctx) {
351
+ return proto_parse_join(self, ctx, 3);
352
+ }
353
+
354
+ static VALUE parse_join_even(VALUE self, VALUE ctx) {
355
+ return proto_parse_join(self, ctx, 2);
356
+ }
357
+
358
+ static VALUE parse_join_odd(VALUE self, VALUE ctx) {
359
+ return proto_parse_join(self, ctx, 1);
360
+ }
361
+
362
+ static VALUE parse_map(VALUE self, VALUE ctx) {
363
+ VALUE* data = RSTRUCT_PTR(self);
364
+ VALUE res = call_parse(data[0], ctx);
365
+ if (res == invalid) return res;
366
+ return rb_proc_call(data[1], rb_ary_new3(1, res));
367
+ }
368
+
369
+ // function like ParseContext.on_fail, but don't re-define it
370
+ static VALUE parse_context_on_fail(VALUE self, VALUE mask) {
371
+ struct strscanner* ss = 0;
372
+ Data_Get_Struct(self, struct strscanner, ss);
373
+ if (ss) {
374
+ int pos = ss->curr;
375
+ int last_fail_pos = NUM2INT(rb_ivar_get(self, rb_intern("@last_fail_pos")));
376
+ if (pos > last_fail_pos) {
377
+ volatile VALUE new_fail_pos = INT2NUM(pos);
378
+ rb_ivar_set(self, rb_intern("@last_fail_pos"), INT2NUM(pos));
379
+ rb_ivar_set(self, rb_intern("@last_fail_mask"), mask);
380
+ } else if (pos == last_fail_pos) {
381
+ volatile VALUE last_fail_mask = rb_ivar_get(self, rb_intern("@last_fail_mask"));
382
+ last_fail_mask = rb_funcall(last_fail_mask, rb_intern("|"), 1, mask);
383
+ rb_ivar_set(self, rb_intern("@last_fail_mask"), last_fail_mask);
384
+ }
385
+ }
386
+ return Qnil;
387
+ }
388
+
389
+ static VALUE parse_fail(VALUE self, VALUE ctx) {
390
+ VALUE left = RSTRUCT_PTR(self)[0];
391
+ VALUE right = RSTRUCT_PTR(self)[1];
392
+ volatile VALUE res = call_parse(left, ctx);
393
+ if (res == invalid) {
394
+ parse_context_on_fail(ctx, right);
395
+ }
396
+ return res;
397
+ }
398
+
399
+
400
+ // -----------------------------------------------------------------------------
401
+ // init
402
+
403
+ #ifdef __cplusplus
404
+ extern "C"
405
+ #endif
406
+ void
407
+ #ifdef _WIN32
408
+ __declspec(dllexport)
409
+ #endif
410
+ Init_predef() {
411
+ VALUE rsec = rb_define_module("Rsec");
412
+ VALUE predef = rb_define_class_under(rsec, "Predef", rb_cObject);
413
+ invalid = rb_const_get(rsec, rb_intern("INVALID"));
414
+ ID_parse = rb_intern("_parse");
415
+ rb_include_module(predef, rsec);
416
+
417
+ // -----------------------------------------------------------------------------
418
+ // redefine parse methods
419
+
420
+ # define REDEFINE(klass_name, method) \
421
+ rb_define_method(rb_const_get(rsec, rb_intern(klass_name)), "_parse", method, 1)
422
+
423
+ REDEFINE("PDouble", parse_double);
424
+ // REDEFINE("PFloat", parse_float);
425
+ REDEFINE("PInt32", parse_int32);
426
+ // REDEFINE("PInt64", parse_int64);
427
+ REDEFINE("PUnsignedInt32", parse_unsigned_int32);
428
+ // REDEFINE("PUnsignedInt64", parse_unsigned_int64);
429
+
430
+ REDEFINE("Seq", parse_seq);
431
+ REDEFINE("Seq_", parse_seq_);
432
+ REDEFINE("SeqOne", parse_seq_one);
433
+ REDEFINE("SeqOne_", parse_seq_one_);
434
+ REDEFINE("Branch", parse_branch);
435
+
436
+ REDEFINE("FixString", parse_fix_string);
437
+ REDEFINE("OneOfByte", parse_one_of_byte);
438
+ REDEFINE("OneOfByte_", parse_one_of_byte_);
439
+
440
+ REDEFINE("Join", parse_join);
441
+ REDEFINE("JoinEven", parse_join_even);
442
+ REDEFINE("JoinOdd", parse_join_odd);
443
+ REDEFINE("Map", parse_map);
444
+ REDEFINE("Fail", parse_fail);
445
+
446
+ # undef REDEFINE
447
+ }
448
+
@@ -0,0 +1 @@
1
+ As Ruby's
@@ -0,0 +1,30 @@
1
+ == Parser / Regexp Combinator for Ruby.
2
+
3
+ Easier and faster than treetop / rex+racc.
4
+
5
+ It's ruby1.9 only.
6
+
7
+ == License
8
+
9
+ As Ruby's
10
+
11
+ == Install
12
+
13
+ The pure Ruby gem is fast enough (about 10+x faster than treetop generated code):
14
+
15
+ gem in rsec
16
+
17
+ For extreme performance under C Ruby:
18
+
19
+ gem in rsec-ext
20
+
21
+ It is about 30% faster than Haskell Parsec in the benchmark.
22
+
23
+ == Doc
24
+
25
+ http://rsec.heroku.com
26
+
27
+ == Code
28
+
29
+ http://github.com/luikore/rsec/tree/master
30
+
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rsec-ext
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - NS
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-02-24 00:00:00.000000000 +08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rsec
17
+ requirement: &24868296 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - =
21
+ - !ruby/object:Gem::Version
22
+ version: 0.3.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *24868296
26
+ description: Easy and extreme fast dynamic PEG parser combinator.
27
+ email:
28
+ executables: []
29
+ extensions:
30
+ - ext/rsec/extconf.rb
31
+ extra_rdoc_files:
32
+ - readme.rdoc
33
+ files:
34
+ - license.txt
35
+ - readme.rdoc
36
+ - ext/rsec/ext.rb
37
+ - ext/rsec/predef.c
38
+ - ext/rsec/extconf.rb
39
+ has_rdoc: true
40
+ homepage: http://rsec.heroku.com
41
+ licenses: []
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - ext
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: 1.9.1
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.5.2
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Extreme Fast Parser Combinator for Ruby, the C extension part
64
+ test_files: []