rsec 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,64 @@
1
+ # coding: utf-8
2
+
3
+ module Rsec #:nodoc:
4
+ # parser base
5
+ module Parser
6
+ # parses string<br/>
7
+ # returns nil if unparsed
8
+ def parse str, source_name='source'
9
+ ctx = ParseContext.new str, source_name
10
+ _parse ctx
11
+ end
12
+
13
+ # almost the same as parse<br/>
14
+ # but raises SyntaxError
15
+ def parse! str, source_name='source'
16
+ ctx = ParseContext.new str, source_name
17
+ ret = _parse ctx
18
+ if INVALID[ret]
19
+ raise ctx.generate_error source_name
20
+ end
21
+ ret
22
+ end
23
+
24
+ attr_accessor :name
25
+ def inspect
26
+ # TODO move
27
+ @name ||= self.class.to_s[/\w+$/]
28
+ case self
29
+ when Lazy
30
+ "<#{name}>"
31
+ when Binary
32
+ "<#{name} #{left.inspect} #{right.inspect}>"
33
+ when Seq, Seq_, Branch
34
+ # don't use redefined map!
35
+ res = []
36
+ each{|e| res << e.inspect}
37
+ "<#{name} #{res.join ' '}>"
38
+ when Unary
39
+ "<#{name} #{some.inspect}>"
40
+ else
41
+ "<#{name}>"
42
+ end
43
+ end
44
+ end
45
+
46
+ # parsers with 2 params base
47
+ Binary = Struct.new :left, :right
48
+ class Binary
49
+ include Parser
50
+ end
51
+
52
+ # parsers with 1 param base
53
+ Unary = Struct.new :some
54
+ class Unary
55
+ include Parser
56
+ end
57
+ end
58
+
59
+ require "rsec/parsers/misc"
60
+ require "rsec/parsers/seq"
61
+ require "rsec/parsers/prim"
62
+ require "rsec/parsers/join"
63
+ require "rsec/parsers/repeat"
64
+
@@ -0,0 +1,86 @@
1
+ module Rsec
2
+
3
+ # Join base
4
+ class Join < Binary
5
+ def _parse ctx
6
+ e = left._parse ctx
7
+ return INVALID if INVALID[e]
8
+ ret = [e]
9
+ loop do
10
+ save_point = ctx.pos
11
+ i = right._parse ctx
12
+ if INVALID[i]
13
+ ctx.pos = save_point
14
+ break
15
+ end
16
+
17
+ t = left._parse ctx
18
+ if INVALID[t]
19
+ ctx.pos = save_point
20
+ break
21
+ end
22
+
23
+ break if save_point == ctx.pos # stop if no advance, prevent infinite loop
24
+ ret << i
25
+ ret << t
26
+ end # loop
27
+ ret
28
+ end
29
+ end
30
+
31
+ # keep only tokens
32
+ class JoinEven < Binary
33
+ def _parse ctx
34
+ e = left._parse ctx
35
+ return INVALID if INVALID[e]
36
+ ret = [e]
37
+ loop do
38
+ save_point = ctx.pos
39
+ i = right._parse ctx
40
+ if INVALID[i]
41
+ ctx.pos = save_point
42
+ break
43
+ end
44
+
45
+ t = left._parse ctx
46
+ if INVALID[t]
47
+ ctx.pos = save_point
48
+ break
49
+ end
50
+
51
+ break if save_point == ctx.pos # stop if no advance, prevent infinite loop
52
+ ret << t
53
+ end # loop
54
+ ret
55
+ end
56
+ end
57
+
58
+ # keep only inters
59
+ # NOTE if only 1 token matches, return empty array
60
+ class JoinOdd < Binary
61
+ def _parse ctx
62
+ e = left._parse ctx
63
+ return INVALID if INVALID[e]
64
+ ret = []
65
+ loop do
66
+ save_point = ctx.pos
67
+ i = right._parse ctx
68
+ if INVALID[i]
69
+ ctx.pos = save_point
70
+ break
71
+ end
72
+
73
+ t = left._parse ctx
74
+ if INVALID[t]
75
+ ctx.pos = save_point
76
+ break
77
+ end
78
+
79
+ break if save_point == ctx.pos # stop if no advance, prevent infinite loop
80
+ ret << i
81
+ end # loop
82
+ ret
83
+ end
84
+ end
85
+
86
+ end
@@ -0,0 +1,201 @@
1
+ module Rsec #:nodoc
2
+
3
+ # transform parse result
4
+ class Map < Binary
5
+ def _parse ctx
6
+ res = left()._parse ctx
7
+ return INVALID if INVALID[res]
8
+ right()[res]
9
+ end
10
+ end
11
+
12
+ # set expect tokens for parsing error in ctx<br/>
13
+ # if left failed, the error would be registered
14
+ class Fail < Binary
15
+ def Fail.[] left, tokens
16
+ # TODO mutex
17
+ @mask_bit ||= 0
18
+ @token_table ||= []
19
+ if @mask_bit > 1000
20
+ raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away."
21
+ end
22
+ parser = super(left, (1<<@mask_bit))
23
+ @token_table[@mask_bit] = tokens
24
+ @mask_bit += 1
25
+ parser
26
+ end
27
+
28
+ def Fail.reset
29
+ @mask_bit = 0
30
+ @token_table = []
31
+ end
32
+
33
+ def Fail.get_tokens mask
34
+ res = []
35
+ @token_table.each_with_index do |tokens, idx|
36
+ next unless tokens
37
+ if (mask & (1<<idx)) > 0
38
+ res += tokens
39
+ end
40
+ end
41
+ res.uniq!
42
+ res
43
+ end
44
+
45
+ def _parse ctx
46
+ res = left()._parse ctx
47
+ ctx.on_fail right if INVALID[res]
48
+ res
49
+ end
50
+ end
51
+
52
+ # look ahead
53
+ class LookAhead < Binary
54
+ def _parse ctx
55
+ res = left()._parse ctx
56
+ pos = ctx.pos
57
+ return INVALID if INVALID[right()._parse ctx]
58
+ ctx.pos = pos
59
+ res
60
+ end
61
+ end
62
+
63
+ # negative look ahead
64
+ class NegativeLookAhead < Binary
65
+ def _parse ctx
66
+ res = left()._parse ctx
67
+ pos = ctx.pos
68
+ if INVALID[right()._parse ctx]
69
+ ctx.pos = pos
70
+ res
71
+ end
72
+ end
73
+ end
74
+
75
+ # branch combinator<br/>
76
+ # result in one of the members, or INVALID
77
+ class Branch < Unary
78
+ def _parse ctx
79
+ save_point = ctx.pos
80
+ some.each do |e|
81
+ res = e._parse ctx
82
+ return res unless INVALID[res]
83
+ ctx.pos = save_point
84
+ end
85
+ INVALID
86
+ end
87
+ end
88
+
89
+ # matches a pattern
90
+ class Pattern < Unary
91
+ def _parse ctx
92
+ ctx.scan some() or INVALID
93
+ end
94
+ end
95
+
96
+ # scan until the pattern<br/>
97
+ # for optimizing
98
+ class UntilPattern < Unary
99
+ def _parse ctx
100
+ ctx.scan_until some() or INVALID
101
+ end
102
+ end
103
+
104
+ # for optimization, not disposed to users
105
+ class SkipPattern < Unary
106
+ def _parse ctx
107
+ ctx.skip some() or INVALID
108
+ end
109
+ end
110
+
111
+ # for optimization, not disposed to users
112
+ class SkipUntilPattern < Unary
113
+ def _parse ctx
114
+ ctx.skip_until some() or INVALID
115
+ end
116
+ end
117
+
118
+ # should be end-of-file after parsing
119
+ class Eof < Unary
120
+ def _parse ctx
121
+ ret = some()._parse ctx
122
+ ctx.eos? ? ret : INVALID
123
+ end
124
+ end
125
+
126
+ # one of char in string
127
+ class OneOf < Unary
128
+ def _parse ctx
129
+ return INVALID if ctx.eos?
130
+ chr = ctx.getch
131
+ if some().index(chr)
132
+ chr
133
+ else
134
+ ctx.pos = ctx.pos - 1
135
+ INVALID
136
+ end
137
+ end
138
+ end
139
+
140
+ # one of char in string
141
+ class OneOf_ < Unary
142
+ def _parse ctx
143
+ ctx.skip /\s*/
144
+ return INVALID if ctx.eos?
145
+ chr = ctx.getch
146
+ unless some().index(chr)
147
+ return INVALID
148
+ end
149
+ ctx.skip /\s*/
150
+ chr
151
+ end
152
+ end
153
+
154
+ # sometimes a variable is not defined yet<br/>
155
+ # lazy is used to capture it later
156
+ # NOTE the value is captured the first time it is called
157
+ class Lazy < Unary
158
+ def _parse ctx
159
+ @some ||= \
160
+ begin
161
+ some()[]
162
+ rescue NameError => ex
163
+ some().binding.eval ex.name.to_s
164
+ end
165
+ @some._parse ctx
166
+ end
167
+ end
168
+
169
+ # parse result is cached in ctx.
170
+ # may improve performance
171
+ class Cached
172
+ include Parser
173
+
174
+ def self.[] parser
175
+ self.new parser
176
+ end
177
+
178
+ def initialize parser
179
+ @parser = parser
180
+ @salt = object_id() << 32
181
+ end
182
+
183
+ def _parse ctx
184
+ key = ctx.pos | @salt
185
+ cache = ctx.cache
186
+ # result maybe nil, so don't use ||=
187
+ if cache.has_key? key
188
+ ret, pos = cache[key]
189
+ ctx.pos = pos
190
+ ret
191
+ else
192
+ ret = @parser._parse ctx
193
+ pos = ctx.pos
194
+ cache[key] = [ret, pos]
195
+ ret
196
+ end
197
+ end
198
+ end
199
+
200
+ end
201
+
@@ -0,0 +1,102 @@
1
+ module Rsec
2
+
3
+ # primitive base
4
+ module Prim
5
+ def sign_strategy_to_pattern sign_strategy
6
+ case sign_strategy
7
+ when 3; '[\+\-]?'
8
+ when 2; '\+?'
9
+ when 1; '\-?'
10
+ when 0; ''
11
+ end
12
+ end
13
+ end
14
+
15
+ # double precision float parser
16
+ class PDouble < Binary
17
+ include Prim
18
+
19
+ def float_pattern sign_strategy, is_hex
20
+ sign = sign_strategy_to_pattern sign_strategy
21
+ if is_hex
22
+ /#{sign}0x[\da-f]+(\.[\da-f]+)?/i
23
+ else
24
+ /#{sign}\d+(\.\d+)?(e[\+\-]?\d+)?/i
25
+ end
26
+ end
27
+
28
+ def initialize sign_strategy, is_hex
29
+ self.left = float_pattern sign_strategy, is_hex
30
+ end
31
+
32
+ def _parse ctx
33
+ if (d = ctx.scan left)
34
+ d = Float(d)
35
+ return d if d.finite?
36
+ end
37
+ INVALID
38
+ end
39
+ end
40
+
41
+ # primitive int parser commons
42
+ class PInt < Binary
43
+ include Prim
44
+
45
+ def int_pattern sign_strategy, base
46
+ sign = sign_strategy_to_pattern sign_strategy
47
+ if base > 10
48
+ d_hi = 9
49
+ char_range = "a-#{('a'.ord + base - 11).chr}"
50
+ else
51
+ d_hi = base - 1
52
+ char_range = ''
53
+ end
54
+ /#{sign}[0-#{d_hi}#{char_range}]+/i
55
+ end
56
+
57
+ def _parse ctx
58
+ if (d = ctx.scan left)
59
+ d = d.to_i @base
60
+ return d if right.include?(d)
61
+ end
62
+ INVALID
63
+ end
64
+ end
65
+
66
+ # 32-bit int parser
67
+ class PInt32 < PInt
68
+ def initialize sign_strategy, base
69
+ @base = base
70
+ self.left = int_pattern sign_strategy, base
71
+ self.right = (-(1<<31))..((1<<31)-1)
72
+ end
73
+ end
74
+
75
+ # unsigned 32 bit int parser
76
+ class PUnsignedInt32 < PInt
77
+ def initialize sign_strategy, base
78
+ @base = base
79
+ self.left = int_pattern sign_strategy, base
80
+ self.right = 0...(1<<32)
81
+ end
82
+ end
83
+
84
+ # 64-bit int parser
85
+ class PInt64 < PInt
86
+ def initialize sign_strategy, base
87
+ @base = base
88
+ self.left = int_pattern sign_strategy, base
89
+ self.right = (-(1<<63))..((1<<63)-1)
90
+ end
91
+ end
92
+
93
+ # unsigned 64-bit int parser
94
+ class PUnsignedInt64 < PInt
95
+ def initialize sign_strategy, base
96
+ @base = base
97
+ self.left = int_pattern sign_strategy, base
98
+ self.right = 0...(1<<64)
99
+ end
100
+ end
101
+
102
+ end