rsec 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ # coding: utf-8
2
+
3
+ module Rsec #:nodoc:
4
+ # parser base
5
+ module Parser
6
+ # parses string<br/>
7
+ # returns nil if unparsed
8
+ def parse str, source_name='source'
9
+ ctx = ParseContext.new str, source_name
10
+ _parse ctx
11
+ end
12
+
13
+ # almost the same as parse<br/>
14
+ # but raises SyntaxError
15
+ def parse! str, source_name='source'
16
+ ctx = ParseContext.new str, source_name
17
+ ret = _parse ctx
18
+ if INVALID[ret]
19
+ raise ctx.generate_error source_name
20
+ end
21
+ ret
22
+ end
23
+
24
+ attr_accessor :name
25
+ def inspect
26
+ # TODO move
27
+ @name ||= self.class.to_s[/\w+$/]
28
+ case self
29
+ when Lazy
30
+ "<#{name}>"
31
+ when Binary
32
+ "<#{name} #{left.inspect} #{right.inspect}>"
33
+ when Seq, Seq_, Branch
34
+ # don't use redefined map!
35
+ res = []
36
+ each{|e| res << e.inspect}
37
+ "<#{name} #{res.join ' '}>"
38
+ when Unary
39
+ "<#{name} #{some.inspect}>"
40
+ else
41
+ "<#{name}>"
42
+ end
43
+ end
44
+ end
45
+
46
+ # parsers with 2 params base
47
+ Binary = Struct.new :left, :right
48
+ class Binary
49
+ include Parser
50
+ end
51
+
52
+ # parsers with 1 param base
53
+ Unary = Struct.new :some
54
+ class Unary
55
+ include Parser
56
+ end
57
+ end
58
+
59
+ require "rsec/parsers/misc"
60
+ require "rsec/parsers/seq"
61
+ require "rsec/parsers/prim"
62
+ require "rsec/parsers/join"
63
+ require "rsec/parsers/repeat"
64
+
@@ -0,0 +1,86 @@
1
+ module Rsec
2
+
3
+ # Join base
4
+ class Join < Binary
5
+ def _parse ctx
6
+ e = left._parse ctx
7
+ return INVALID if INVALID[e]
8
+ ret = [e]
9
+ loop do
10
+ save_point = ctx.pos
11
+ i = right._parse ctx
12
+ if INVALID[i]
13
+ ctx.pos = save_point
14
+ break
15
+ end
16
+
17
+ t = left._parse ctx
18
+ if INVALID[t]
19
+ ctx.pos = save_point
20
+ break
21
+ end
22
+
23
+ break if save_point == ctx.pos # stop if no advance, prevent infinite loop
24
+ ret << i
25
+ ret << t
26
+ end # loop
27
+ ret
28
+ end
29
+ end
30
+
31
+ # keep only tokens
32
+ class JoinEven < Binary
33
+ def _parse ctx
34
+ e = left._parse ctx
35
+ return INVALID if INVALID[e]
36
+ ret = [e]
37
+ loop do
38
+ save_point = ctx.pos
39
+ i = right._parse ctx
40
+ if INVALID[i]
41
+ ctx.pos = save_point
42
+ break
43
+ end
44
+
45
+ t = left._parse ctx
46
+ if INVALID[t]
47
+ ctx.pos = save_point
48
+ break
49
+ end
50
+
51
+ break if save_point == ctx.pos # stop if no advance, prevent infinite loop
52
+ ret << t
53
+ end # loop
54
+ ret
55
+ end
56
+ end
57
+
58
+ # keep only inters
59
+ # NOTE if only 1 token matches, return empty array
60
+ class JoinOdd < Binary
61
+ def _parse ctx
62
+ e = left._parse ctx
63
+ return INVALID if INVALID[e]
64
+ ret = []
65
+ loop do
66
+ save_point = ctx.pos
67
+ i = right._parse ctx
68
+ if INVALID[i]
69
+ ctx.pos = save_point
70
+ break
71
+ end
72
+
73
+ t = left._parse ctx
74
+ if INVALID[t]
75
+ ctx.pos = save_point
76
+ break
77
+ end
78
+
79
+ break if save_point == ctx.pos # stop if no advance, prevent infinite loop
80
+ ret << i
81
+ end # loop
82
+ ret
83
+ end
84
+ end
85
+
86
+ end
@@ -0,0 +1,201 @@
1
+ module Rsec #:nodoc
2
+
3
+ # transform parse result
4
+ class Map < Binary
5
+ def _parse ctx
6
+ res = left()._parse ctx
7
+ return INVALID if INVALID[res]
8
+ right()[res]
9
+ end
10
+ end
11
+
12
+ # set expect tokens for parsing error in ctx<br/>
13
+ # if left failed, the error would be registered
14
+ class Fail < Binary
15
+ def Fail.[] left, tokens
16
+ # TODO mutex
17
+ @mask_bit ||= 0
18
+ @token_table ||= []
19
+ if @mask_bit > 1000
20
+ raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away."
21
+ end
22
+ parser = super(left, (1<<@mask_bit))
23
+ @token_table[@mask_bit] = tokens
24
+ @mask_bit += 1
25
+ parser
26
+ end
27
+
28
+ def Fail.reset
29
+ @mask_bit = 0
30
+ @token_table = []
31
+ end
32
+
33
+ def Fail.get_tokens mask
34
+ res = []
35
+ @token_table.each_with_index do |tokens, idx|
36
+ next unless tokens
37
+ if (mask & (1<<idx)) > 0
38
+ res += tokens
39
+ end
40
+ end
41
+ res.uniq!
42
+ res
43
+ end
44
+
45
+ def _parse ctx
46
+ res = left()._parse ctx
47
+ ctx.on_fail right if INVALID[res]
48
+ res
49
+ end
50
+ end
51
+
52
+ # look ahead
53
+ class LookAhead < Binary
54
+ def _parse ctx
55
+ res = left()._parse ctx
56
+ pos = ctx.pos
57
+ return INVALID if INVALID[right()._parse ctx]
58
+ ctx.pos = pos
59
+ res
60
+ end
61
+ end
62
+
63
+ # negative look ahead
64
+ class NegativeLookAhead < Binary
65
+ def _parse ctx
66
+ res = left()._parse ctx
67
+ pos = ctx.pos
68
+ if INVALID[right()._parse ctx]
69
+ ctx.pos = pos
70
+ res
71
+ end
72
+ end
73
+ end
74
+
75
+ # branch combinator<br/>
76
+ # result in one of the members, or INVALID
77
+ class Branch < Unary
78
+ def _parse ctx
79
+ save_point = ctx.pos
80
+ some.each do |e|
81
+ res = e._parse ctx
82
+ return res unless INVALID[res]
83
+ ctx.pos = save_point
84
+ end
85
+ INVALID
86
+ end
87
+ end
88
+
89
+ # matches a pattern
90
+ class Pattern < Unary
91
+ def _parse ctx
92
+ ctx.scan some() or INVALID
93
+ end
94
+ end
95
+
96
+ # scan until the pattern<br/>
97
+ # for optimizing
98
+ class UntilPattern < Unary
99
+ def _parse ctx
100
+ ctx.scan_until some() or INVALID
101
+ end
102
+ end
103
+
104
+ # for optimization, not disposed to users
105
+ class SkipPattern < Unary
106
+ def _parse ctx
107
+ ctx.skip some() or INVALID
108
+ end
109
+ end
110
+
111
+ # for optimization, not disposed to users
112
+ class SkipUntilPattern < Unary
113
+ def _parse ctx
114
+ ctx.skip_until some() or INVALID
115
+ end
116
+ end
117
+
118
+ # should be end-of-file after parsing
119
+ class Eof < Unary
120
+ def _parse ctx
121
+ ret = some()._parse ctx
122
+ ctx.eos? ? ret : INVALID
123
+ end
124
+ end
125
+
126
+ # one of char in string
127
+ class OneOf < Unary
128
+ def _parse ctx
129
+ return INVALID if ctx.eos?
130
+ chr = ctx.getch
131
+ if some().index(chr)
132
+ chr
133
+ else
134
+ ctx.pos = ctx.pos - 1
135
+ INVALID
136
+ end
137
+ end
138
+ end
139
+
140
+ # one of char in string
141
+ class OneOf_ < Unary
142
+ def _parse ctx
143
+ ctx.skip /\s*/
144
+ return INVALID if ctx.eos?
145
+ chr = ctx.getch
146
+ unless some().index(chr)
147
+ return INVALID
148
+ end
149
+ ctx.skip /\s*/
150
+ chr
151
+ end
152
+ end
153
+
154
+ # sometimes a variable is not defined yet<br/>
155
+ # lazy is used to capture it later
156
+ # NOTE the value is captured the first time it is called
157
+ class Lazy < Unary
158
+ def _parse ctx
159
+ @some ||= \
160
+ begin
161
+ some()[]
162
+ rescue NameError => ex
163
+ some().binding.eval ex.name.to_s
164
+ end
165
+ @some._parse ctx
166
+ end
167
+ end
168
+
169
+ # parse result is cached in ctx.
170
+ # may improve performance
171
+ class Cached
172
+ include Parser
173
+
174
+ def self.[] parser
175
+ self.new parser
176
+ end
177
+
178
+ def initialize parser
179
+ @parser = parser
180
+ @salt = object_id() << 32
181
+ end
182
+
183
+ def _parse ctx
184
+ key = ctx.pos | @salt
185
+ cache = ctx.cache
186
+ # result maybe nil, so don't use ||=
187
+ if cache.has_key? key
188
+ ret, pos = cache[key]
189
+ ctx.pos = pos
190
+ ret
191
+ else
192
+ ret = @parser._parse ctx
193
+ pos = ctx.pos
194
+ cache[key] = [ret, pos]
195
+ ret
196
+ end
197
+ end
198
+ end
199
+
200
+ end
201
+
@@ -0,0 +1,102 @@
1
+ module Rsec
2
+
3
+ # primitive base
4
+ module Prim
5
+ def sign_strategy_to_pattern sign_strategy
6
+ case sign_strategy
7
+ when 3; '[\+\-]?'
8
+ when 2; '\+?'
9
+ when 1; '\-?'
10
+ when 0; ''
11
+ end
12
+ end
13
+ end
14
+
15
+ # double precision float parser
16
+ class PDouble < Binary
17
+ include Prim
18
+
19
+ def float_pattern sign_strategy, is_hex
20
+ sign = sign_strategy_to_pattern sign_strategy
21
+ if is_hex
22
+ /#{sign}0x[\da-f]+(\.[\da-f]+)?/i
23
+ else
24
+ /#{sign}\d+(\.\d+)?(e[\+\-]?\d+)?/i
25
+ end
26
+ end
27
+
28
+ def initialize sign_strategy, is_hex
29
+ self.left = float_pattern sign_strategy, is_hex
30
+ end
31
+
32
+ def _parse ctx
33
+ if (d = ctx.scan left)
34
+ d = Float(d)
35
+ return d if d.finite?
36
+ end
37
+ INVALID
38
+ end
39
+ end
40
+
41
+ # primitive int parser commons
42
+ class PInt < Binary
43
+ include Prim
44
+
45
+ def int_pattern sign_strategy, base
46
+ sign = sign_strategy_to_pattern sign_strategy
47
+ if base > 10
48
+ d_hi = 9
49
+ char_range = "a-#{('a'.ord + base - 11).chr}"
50
+ else
51
+ d_hi = base - 1
52
+ char_range = ''
53
+ end
54
+ /#{sign}[0-#{d_hi}#{char_range}]+/i
55
+ end
56
+
57
+ def _parse ctx
58
+ if (d = ctx.scan left)
59
+ d = d.to_i @base
60
+ return d if right.include?(d)
61
+ end
62
+ INVALID
63
+ end
64
+ end
65
+
66
+ # 32-bit int parser
67
+ class PInt32 < PInt
68
+ def initialize sign_strategy, base
69
+ @base = base
70
+ self.left = int_pattern sign_strategy, base
71
+ self.right = (-(1<<31))..((1<<31)-1)
72
+ end
73
+ end
74
+
75
+ # unsigned 32 bit int parser
76
+ class PUnsignedInt32 < PInt
77
+ def initialize sign_strategy, base
78
+ @base = base
79
+ self.left = int_pattern sign_strategy, base
80
+ self.right = 0...(1<<32)
81
+ end
82
+ end
83
+
84
+ # 64-bit int parser
85
+ class PInt64 < PInt
86
+ def initialize sign_strategy, base
87
+ @base = base
88
+ self.left = int_pattern sign_strategy, base
89
+ self.right = (-(1<<63))..((1<<63)-1)
90
+ end
91
+ end
92
+
93
+ # unsigned 64-bit int parser
94
+ class PUnsignedInt64 < PInt
95
+ def initialize sign_strategy, base
96
+ @base = base
97
+ self.left = int_pattern sign_strategy, base
98
+ self.right = 0...(1<<64)
99
+ end
100
+ end
101
+
102
+ end