citrus 1.8.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +53 -46
- data/benchmark/after.dat +192 -0
- data/benchmark/before.dat +192 -0
- data/benchmark/master.dat +192 -0
- data/doc/background.markdown +9 -10
- data/doc/example.markdown +24 -15
- data/doc/syntax.markdown +20 -21
- data/lib/citrus.rb +208 -178
- data/lib/citrus/debug.rb +34 -4
- data/test/file_test.rb +12 -12
- data/test/helper.rb +27 -5
- data/test/match_test.rb +18 -34
- data/test/parse_error_test.rb +56 -0
- data/test/terminal_test.rb +56 -0
- metadata +10 -7
- data/test/expression_test.rb +0 -29
- data/test/fixed_width_test.rb +0 -37
@@ -0,0 +1,192 @@
|
|
1
|
+
12 0
|
2
|
+
30 0
|
3
|
+
35 0
|
4
|
+
1749 19
|
5
|
+
1962 49
|
6
|
+
2383 10
|
7
|
+
3728 49
|
8
|
+
3919 59
|
9
|
+
3952 60
|
10
|
+
3995 49
|
11
|
+
4063 50
|
12
|
+
4325 70
|
13
|
+
4527 60
|
14
|
+
4570 60
|
15
|
+
4607 70
|
16
|
+
4654 59
|
17
|
+
4679 70
|
18
|
+
4774 59
|
19
|
+
4968 70
|
20
|
+
5059 39
|
21
|
+
5383 39
|
22
|
+
5915 70
|
23
|
+
6109 70
|
24
|
+
6122 80
|
25
|
+
6218 90
|
26
|
+
6332 70
|
27
|
+
6681 59
|
28
|
+
7440 90
|
29
|
+
7530 80
|
30
|
+
7605 79
|
31
|
+
8155 89
|
32
|
+
8402 100
|
33
|
+
8420 100
|
34
|
+
8617 110
|
35
|
+
8635 109
|
36
|
+
8841 120
|
37
|
+
8843 109
|
38
|
+
8852 100
|
39
|
+
9151 99
|
40
|
+
9271 89
|
41
|
+
9521 99
|
42
|
+
9525 89
|
43
|
+
9566 100
|
44
|
+
9584 109
|
45
|
+
9642 110
|
46
|
+
10138 130
|
47
|
+
10181 109
|
48
|
+
10225 109
|
49
|
+
10338 110
|
50
|
+
10449 99
|
51
|
+
10629 100
|
52
|
+
10763 129
|
53
|
+
10817 130
|
54
|
+
11059 120
|
55
|
+
11062 120
|
56
|
+
11215 119
|
57
|
+
11698 129
|
58
|
+
11891 120
|
59
|
+
11945 139
|
60
|
+
11956 129
|
61
|
+
12018 150
|
62
|
+
12053 109
|
63
|
+
12178 140
|
64
|
+
12283 119
|
65
|
+
12326 99
|
66
|
+
12430 119
|
67
|
+
12438 130
|
68
|
+
12572 130
|
69
|
+
12638 129
|
70
|
+
12687 130
|
71
|
+
12703 129
|
72
|
+
12896 129
|
73
|
+
12922 120
|
74
|
+
12996 129
|
75
|
+
13137 170
|
76
|
+
13211 140
|
77
|
+
13462 140
|
78
|
+
13477 129
|
79
|
+
13576 140
|
80
|
+
13577 140
|
81
|
+
13584 129
|
82
|
+
13605 130
|
83
|
+
13631 160
|
84
|
+
14216 150
|
85
|
+
14237 139
|
86
|
+
14260 130
|
87
|
+
14367 129
|
88
|
+
14371 160
|
89
|
+
14741 170
|
90
|
+
14893 139
|
91
|
+
14910 160
|
92
|
+
14917 149
|
93
|
+
14977 149
|
94
|
+
15049 150
|
95
|
+
15191 179
|
96
|
+
15382 150
|
97
|
+
15618 179
|
98
|
+
15623 179
|
99
|
+
15629 150
|
100
|
+
15856 170
|
101
|
+
16496 200
|
102
|
+
16512 160
|
103
|
+
16956 179
|
104
|
+
17074 149
|
105
|
+
17237 189
|
106
|
+
17371 170
|
107
|
+
17568 189
|
108
|
+
17945 199
|
109
|
+
18147 190
|
110
|
+
18343 210
|
111
|
+
18417 199
|
112
|
+
18823 200
|
113
|
+
18970 210
|
114
|
+
19285 220
|
115
|
+
19333 189
|
116
|
+
19500 199
|
117
|
+
19548 219
|
118
|
+
19634 189
|
119
|
+
19673 209
|
120
|
+
19689 209
|
121
|
+
19909 199
|
122
|
+
20054 199
|
123
|
+
20107 229
|
124
|
+
20248 219
|
125
|
+
20580 219
|
126
|
+
20744 230
|
127
|
+
20806 210
|
128
|
+
20954 230
|
129
|
+
21034 209
|
130
|
+
21187 199
|
131
|
+
21303 239
|
132
|
+
21450 209
|
133
|
+
21626 189
|
134
|
+
21931 289
|
135
|
+
21950 230
|
136
|
+
22359 250
|
137
|
+
22626 209
|
138
|
+
22638 279
|
139
|
+
22772 250
|
140
|
+
22885 259
|
141
|
+
22897 240
|
142
|
+
23114 330
|
143
|
+
23242 229
|
144
|
+
23428 279
|
145
|
+
23452 240
|
146
|
+
23495 250
|
147
|
+
23499 260
|
148
|
+
23558 270
|
149
|
+
23744 220
|
150
|
+
23881 240
|
151
|
+
23945 269
|
152
|
+
24361 250
|
153
|
+
24501 240
|
154
|
+
24642 269
|
155
|
+
24672 249
|
156
|
+
24694 219
|
157
|
+
24706 270
|
158
|
+
24931 290
|
159
|
+
25065 300
|
160
|
+
25140 290
|
161
|
+
25402 250
|
162
|
+
25702 289
|
163
|
+
25743 290
|
164
|
+
27139 359
|
165
|
+
27316 299
|
166
|
+
27333 290
|
167
|
+
27414 309
|
168
|
+
27771 309
|
169
|
+
27798 259
|
170
|
+
28583 320
|
171
|
+
28906 309
|
172
|
+
29025 360
|
173
|
+
29209 310
|
174
|
+
29272 280
|
175
|
+
29273 310
|
176
|
+
29359 270
|
177
|
+
29577 330
|
178
|
+
30886 359
|
179
|
+
31170 359
|
180
|
+
31593 330
|
181
|
+
32460 369
|
182
|
+
32486 390
|
183
|
+
32630 399
|
184
|
+
33010 440
|
185
|
+
33137 419
|
186
|
+
33142 389
|
187
|
+
33739 340
|
188
|
+
39880 470
|
189
|
+
39940 460
|
190
|
+
42952 489
|
191
|
+
43227 500
|
192
|
+
52855 640
|
data/doc/background.markdown
CHANGED
@@ -29,9 +29,9 @@ A [Rule](api/classes/Citrus/Rule.html) is an object that specifies some matching
|
|
29
29
|
behavior on a string. There are two types of rules: terminals and non-terminals.
|
30
30
|
Terminals can be either Ruby strings or regular expressions that specify some
|
31
31
|
input to match. For example, a terminal created from the string "end" would
|
32
|
-
match any sequence of the characters "e", "n", and "d", in that order.
|
33
|
-
|
34
|
-
|
32
|
+
match any sequence of the characters "e", "n", and "d", in that order. Terminals
|
33
|
+
created from regular expressions may match any sequence of characters that can
|
34
|
+
be generated from that expression.
|
35
35
|
|
36
36
|
Non-terminals are rules that may contain other rules but do not themselves match
|
37
37
|
directly on the input. For example, a Repeat is a non-terminal that may contain
|
@@ -58,10 +58,10 @@ similar to Ruby's super keyword.
|
|
58
58
|
## Matches
|
59
59
|
|
60
60
|
Matches are created by rule objects when they match on the input. A
|
61
|
-
[Match](api/classes/Citrus/Match.html)
|
62
|
-
[String](http://ruby-doc.org/core/classes/String.html) with some extra
|
63
|
-
information attached such as the name(s) of the rule(s) which
|
64
|
-
|
61
|
+
[Match](api/classes/Citrus/Match.html) is actually a
|
62
|
+
[String](http://ruby-doc.org/core/classes/String.html) object with some extra
|
63
|
+
information attached such as the name(s) of the rule(s) from which it was
|
64
|
+
generated and any submatches it may contain.
|
65
65
|
|
66
66
|
During a parse, matches are arranged in a tree structure where any match may
|
67
67
|
contain any number of other matches. This structure is determined by the way in
|
@@ -70,6 +70,5 @@ match that is created from a non-terminal rule that contains several other
|
|
70
70
|
terminals will likewise contain several matches, one for each terminal.
|
71
71
|
|
72
72
|
Match objects may be extended with semantic information in the form of methods.
|
73
|
-
These methods
|
74
|
-
|
75
|
-
and any submatches.
|
73
|
+
These methods should provide various interpretations for the semantic value of a
|
74
|
+
match.
|
data/doc/example.markdown
CHANGED
@@ -47,9 +47,9 @@ Submatches are created whenever a rule contains another rule. For example, in
|
|
47
47
|
the grammar above the number rule matches a string of digits followed by white
|
48
48
|
space. Thus, a match generated by the number rule will contain two submatches.
|
49
49
|
|
50
|
-
We can
|
51
|
-
matches when they are created
|
52
|
-
|
50
|
+
We can define methods inside a set of curly braces that will be used to extend
|
51
|
+
matches when they are created. This works in similar fashion to using Ruby's
|
52
|
+
blocks. Let's extend the `Addition` grammar using this technique.
|
53
53
|
|
54
54
|
grammar Addition
|
55
55
|
rule additive
|
@@ -83,25 +83,27 @@ on all match objects that result from matches of those particular rules. It's
|
|
83
83
|
easiest to explain what is going on here by starting with the lowest level
|
84
84
|
block, which is defined within the number rule.
|
85
85
|
|
86
|
-
The semantic block associated with the number rule defines one method, value
|
86
|
+
The semantic block associated with the number rule defines one method, `value`.
|
87
87
|
Inside this method, we can see that the value of a number match is determined to
|
88
|
-
be its text value, stripped of white space and converted to an integer.
|
89
|
-
that matches are simply strings, so the `strip`
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
88
|
+
be its text value, stripped of white space and converted to an integer.
|
89
|
+
[Remember](background.html) that matches are simply strings, so the `strip`
|
90
|
+
method in this case is actually
|
91
|
+
[String#strip](http://ruby-doc.org/core/classes/String.html#M000820).
|
92
|
+
|
93
|
+
The `additive` rule also extends its matches with a `value` method. Notice the
|
94
|
+
use of the `term` label within the rule definition. This label allows the match
|
95
|
+
that is created by either the additive or the number rule to be retrieved using
|
96
|
+
the `term` label. The value of an additive is determined to be the values of its
|
96
97
|
`number` and `term` matches added together using Ruby's addition operator.
|
97
98
|
|
98
99
|
Since additive is the first rule defined in the grammar, any match that results
|
99
100
|
from parsing a string with this grammar will have a `value` method that can be
|
100
101
|
used to recursively calculate the collective value of the entire match tree.
|
101
102
|
|
102
|
-
To give it a try, save the code for the Addition grammar in a file called
|
103
|
-
addition.citrus. Next, assuming you have the Citrus
|
104
|
-
|
103
|
+
To give it a try, save the code for the `Addition` grammar in a file called
|
104
|
+
addition.citrus. Next, assuming you have the Citrus
|
105
|
+
[gem](https://rubygems.org/gems/citrus) installed, try the following sequence of
|
106
|
+
commands in a terminal.
|
105
107
|
|
106
108
|
$ irb
|
107
109
|
> require 'citrus'
|
@@ -115,6 +117,13 @@ following sequence of commands in a terminal.
|
|
115
117
|
|
116
118
|
Congratulations! You just ran your first piece of Citrus code.
|
117
119
|
|
120
|
+
One interesting thing to notice about the above sequence of commands is the
|
121
|
+
return value of [Citrus#load](api/classes/Citrus.html#M000003). When you use
|
122
|
+
`Citrus.load` to
|
123
|
+
load a grammar file (and likewise [Citrus#eval](api/classes/Citrus.html#M000004) to evaluate
|
124
|
+
a raw string of grammar code), the return value is an array of all the grammars
|
125
|
+
present in that file.
|
126
|
+
|
118
127
|
Take a look at
|
119
128
|
[examples/calc.citrus](http://github.com/mjijackson/citrus/blob/master/examples/calc.citrus)
|
120
129
|
for an example of a calculator that is able to parse and evaluate more complex
|
data/doc/syntax.markdown
CHANGED
@@ -21,8 +21,7 @@ compatibility with other parsing expression implementations.
|
|
21
21
|
[\x00-\xFF] # match any octet
|
22
22
|
. # match anything, even new lines
|
23
23
|
|
24
|
-
See [
|
25
|
-
[Expression](api/classes/Citrus/Expression.html) for more information.
|
24
|
+
See [Terminal](api/classes/Citrus/Terminal.html) for more information.
|
26
25
|
|
27
26
|
## Repetition
|
28
27
|
|
@@ -108,22 +107,22 @@ See [Label](api/classes/Citrus/Label.html) for more information.
|
|
108
107
|
The following table contains a list of all Citrus operators and their
|
109
108
|
precedence. A higher precedence indicates tighter binding.
|
110
109
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
128
|
-
|
129
|
-
|
110
|
+
Operator | Name | Precedence
|
111
|
+
------------------------- | ------------------------- | ----------
|
112
|
+
`''` | String (single quoted) | 6
|
113
|
+
`""` | String (double quoted) | 6
|
114
|
+
`[]` | Character class | 6
|
115
|
+
`.` | Dot (any character) | 6
|
116
|
+
`//` | Regular expression | 6
|
117
|
+
`()` | Grouping | 6
|
118
|
+
`*` | Repetition (arbitrary) | 5
|
119
|
+
`+` | Repetition (one or more) | 5
|
120
|
+
`?` | Repetition (zero or one) | 5
|
121
|
+
`&` | And predicate | 4
|
122
|
+
`!` | Not predicate | 4
|
123
|
+
`~` | But predicate | 4
|
124
|
+
`:` | Label | 4
|
125
|
+
`<>` | Extension (module name) | 3
|
126
|
+
`{}` | Extension (literal) | 3
|
127
|
+
`e1 e2` | Sequence | 2
|
128
|
+
<code>e1 | e2</code> | Ordered choice | 1
|
data/lib/citrus.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
1
3
|
# Citrus is a compact and powerful parsing library for Ruby that combines the
|
2
4
|
# elegance and expressiveness of the language with the simplicity and power of
|
3
5
|
# parsing expressions.
|
@@ -6,14 +8,14 @@
|
|
6
8
|
module Citrus
|
7
9
|
autoload :File, 'citrus/file'
|
8
10
|
|
9
|
-
VERSION = [
|
11
|
+
VERSION = [2, 0, 0]
|
10
12
|
|
11
13
|
# Returns the current version of Citrus as a string.
|
12
14
|
def self.version
|
13
15
|
VERSION.join('.')
|
14
16
|
end
|
15
17
|
|
16
|
-
# A pattern to match any character, including
|
18
|
+
# A pattern to match any character, including \\n.
|
17
19
|
DOT = /./m
|
18
20
|
|
19
21
|
Infinity = 1.0 / 0
|
@@ -39,25 +41,77 @@ module Citrus
|
|
39
41
|
class ParseError < Exception
|
40
42
|
def initialize(input)
|
41
43
|
@input = input
|
42
|
-
|
43
|
-
|
44
|
-
msg = "Failed to parse input at offset %d" % max_offset
|
45
|
-
msg += ", just after %s" % c[s, c.length].inspect + "\n"
|
44
|
+
msg = "Failed to parse input at offset %d" % offset
|
45
|
+
msg << detail
|
46
46
|
super(msg)
|
47
47
|
end
|
48
48
|
|
49
49
|
# The Input object that was used for the parse.
|
50
50
|
attr_reader :input
|
51
51
|
|
52
|
-
# Returns the
|
53
|
-
|
52
|
+
# Returns the 0-based offset at which the error occurred in the input, i.e.
|
53
|
+
# the maximum offset in the input that was successfully parsed before the
|
54
|
+
# error occurred.
|
55
|
+
def offset
|
54
56
|
input.max_offset
|
55
57
|
end
|
56
58
|
|
57
|
-
# Returns the
|
58
|
-
|
59
|
-
|
60
|
-
|
59
|
+
# Returns the text of the line on which the error occurred.
|
60
|
+
def line
|
61
|
+
lines[line_index]
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the 1-based number of the line in the input where the error
|
65
|
+
# occurred.
|
66
|
+
def line_number
|
67
|
+
line_index + 1
|
68
|
+
end
|
69
|
+
|
70
|
+
alias lineno line_number
|
71
|
+
|
72
|
+
# Returns the 0-based offset at which the error occurred on the line on
|
73
|
+
# which it occurred.
|
74
|
+
def line_offset
|
75
|
+
pos = 0
|
76
|
+
each_line do |line|
|
77
|
+
len = line.length
|
78
|
+
return (offset - pos) if pos + len >= offset
|
79
|
+
pos += len
|
80
|
+
end
|
81
|
+
0
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns a string that, when printed, gives a visual representation of
|
85
|
+
# exactly where the error occurred on its line in the input.
|
86
|
+
def detail
|
87
|
+
"%s\n%s^" % [line, ' ' * line_offset]
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def string
|
93
|
+
input.string
|
94
|
+
end
|
95
|
+
|
96
|
+
def lines
|
97
|
+
string.send(string.respond_to?(:lines) ? :lines : :to_s).to_a
|
98
|
+
end
|
99
|
+
|
100
|
+
def each_line(&block)
|
101
|
+
string.each_line(&block)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns the 0-based number of the line in the input where the error
|
105
|
+
# occurred.
|
106
|
+
def line_index
|
107
|
+
pos = 0
|
108
|
+
idx = 0
|
109
|
+
each_line do |line|
|
110
|
+
pos += line.length
|
111
|
+
return idx if pos >= offset
|
112
|
+
idx += 1
|
113
|
+
end
|
114
|
+
0
|
61
115
|
end
|
62
116
|
end
|
63
117
|
|
@@ -84,6 +138,7 @@ module Citrus
|
|
84
138
|
# exposes Module#include.
|
85
139
|
def self.included(mod)
|
86
140
|
mod.extend(GrammarMethods)
|
141
|
+
# Expose #include so it can be called publicly.
|
87
142
|
class << mod; public :include end
|
88
143
|
end
|
89
144
|
end
|
@@ -91,7 +146,7 @@ module Citrus
|
|
91
146
|
# Contains methods that are available to Grammar modules at the class level.
|
92
147
|
module GrammarMethods
|
93
148
|
def self.extend_object(obj)
|
94
|
-
raise ArgumentError, "Grammars must be
|
149
|
+
raise ArgumentError, "Grammars must be Modules" unless Module === obj
|
95
150
|
super
|
96
151
|
end
|
97
152
|
|
@@ -153,9 +208,9 @@ module Citrus
|
|
153
208
|
# It is important to note that this method will also check any included
|
154
209
|
# grammars for a rule with the given +name+ if one cannot be found in this
|
155
210
|
# grammar.
|
156
|
-
def rule(name, obj=nil)
|
211
|
+
def rule(name, obj=nil, &block)
|
157
212
|
sym = name.to_sym
|
158
|
-
obj =
|
213
|
+
obj = block.call if block
|
159
214
|
|
160
215
|
if obj
|
161
216
|
rule_names << sym unless has_rule?(sym)
|
@@ -256,9 +311,9 @@ module Citrus
|
|
256
311
|
# Specifies a Module that will be used to extend all matches created with
|
257
312
|
# the given +rule+. A block may also be given that will be used to create
|
258
313
|
# an anonymous module. See Rule#ext=.
|
259
|
-
def ext(rule, mod=nil)
|
314
|
+
def ext(rule, mod=nil, &block)
|
260
315
|
rule = Rule.new(rule)
|
261
|
-
mod =
|
316
|
+
mod = block if block
|
262
317
|
rule.extension = mod if mod
|
263
318
|
rule
|
264
319
|
end
|
@@ -273,9 +328,11 @@ module Citrus
|
|
273
328
|
root_rule = rule(opts[:root])
|
274
329
|
raise 'No rule named "%s"' % root unless root_rule
|
275
330
|
|
276
|
-
input = Input.new(string
|
277
|
-
|
331
|
+
input = Input.new(string)
|
332
|
+
input.memoize! if opts[:memoize]
|
333
|
+
input.pos = opts[:offset] if opts[:offset] > 0
|
278
334
|
|
335
|
+
match = input.match(root_rule)
|
279
336
|
if match.nil? || (opts[:consume] && input.length != match.length)
|
280
337
|
raise ParseError.new(input)
|
281
338
|
end
|
@@ -290,10 +347,8 @@ module Citrus
|
|
290
347
|
# root:: The name of the root rule to use for the parse. Defaults
|
291
348
|
# to the name supplied by calling #root.
|
292
349
|
# memoize:: If this is +true+ the matches generated during a parse are
|
293
|
-
# memoized.
|
294
|
-
#
|
295
|
-
# significantly more in terms of time and memory required.
|
296
|
-
# Defaults to +false+.
|
350
|
+
# memoized. See Input#memoize! for more information. Defaults to
|
351
|
+
# +false+.
|
297
352
|
# consume:: If this is +true+ a ParseError will be raised during a parse
|
298
353
|
# unless the entire input string is consumed. Defaults to
|
299
354
|
# +false+.
|
@@ -308,61 +363,80 @@ module Citrus
|
|
308
363
|
|
309
364
|
# This class represents the core of the parsing algorithm. It wraps the input
|
310
365
|
# string and serves matches to all nonterminals.
|
311
|
-
class Input
|
312
|
-
|
313
|
-
|
314
|
-
def initialize(string, memoize=false)
|
315
|
-
@string = string
|
366
|
+
class Input < StringScanner
|
367
|
+
def initialize(string)
|
368
|
+
super(string)
|
316
369
|
@max_offset = 0
|
317
|
-
if memoize
|
318
|
-
@cache = {}
|
319
|
-
@cache_hits = 0
|
320
|
-
end
|
321
370
|
end
|
322
371
|
|
323
|
-
# The
|
324
|
-
attr_reader :string
|
325
|
-
|
326
|
-
# The maximum offset that has been achieved.
|
372
|
+
# The maximum offset that has been achieved during a parse.
|
327
373
|
attr_reader :max_offset
|
328
374
|
|
329
|
-
# A
|
330
|
-
#
|
375
|
+
# A nested hash of rule id's to offsets and their respective matches. Only
|
376
|
+
# present if memoing is enabled.
|
331
377
|
attr_reader :cache
|
332
378
|
|
333
379
|
# The number of times the cache was hit. Only present if memoing is enabled.
|
334
380
|
attr_reader :cache_hits
|
335
381
|
|
336
|
-
# Sends all arguments to this input's +string+.
|
337
|
-
def [](*args)
|
338
|
-
@string.__send__(:[], *args)
|
339
|
-
end
|
340
|
-
|
341
382
|
# Returns the length of this input.
|
342
383
|
def length
|
343
|
-
|
384
|
+
string.length
|
344
385
|
end
|
345
386
|
|
346
|
-
# Returns the match for a given +rule+ at
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
387
|
+
# Returns the match for a given +rule+ at the current position in the input.
|
388
|
+
def match(rule)
|
389
|
+
offset = pos
|
390
|
+
match = rule.match(self)
|
391
|
+
|
392
|
+
if match
|
393
|
+
@max_offset = pos if pos > @max_offset
|
394
|
+
else
|
395
|
+
# Reset the position for the next attempt at a match.
|
396
|
+
self.pos = offset
|
397
|
+
end
|
398
|
+
|
399
|
+
match
|
400
|
+
end
|
353
401
|
|
354
|
-
|
355
|
-
|
402
|
+
# Returns true if this input uses memoization to cache match results. See
|
403
|
+
# #memoize!.
|
404
|
+
def memoized?
|
405
|
+
!! @cache
|
406
|
+
end
|
407
|
+
|
408
|
+
# Modifies this object to cache match results during a parse. This technique
|
409
|
+
# (also known as "Packrat" parsing) guarantees parsers will operate in
|
410
|
+
# linear time but costs significantly more in terms of time and memory
|
411
|
+
# required to perform a parse. For more information, please read the paper
|
412
|
+
# on Packrat parsing at http://pdos.csail.mit.edu/~baford/packrat/icfp02/.
|
413
|
+
def memoize!
|
414
|
+
return if memoized?
|
415
|
+
|
416
|
+
# Using +instance_eval+ here preserves access to +super+ within the
|
417
|
+
# methods we define inside the block.
|
418
|
+
instance_eval do
|
419
|
+
def match(rule)
|
420
|
+
c = @cache[rule.id] ||= {}
|
421
|
+
|
422
|
+
if c.key?(pos)
|
423
|
+
@cache_hits += 1
|
424
|
+
c[pos]
|
425
|
+
else
|
426
|
+
c[pos] = super
|
427
|
+
end
|
428
|
+
end
|
356
429
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
430
|
+
def reset
|
431
|
+
super
|
432
|
+
@max_offset = 0
|
433
|
+
@cache = {}
|
434
|
+
@cache_hits = 0
|
362
435
|
end
|
363
|
-
else
|
364
|
-
rule.match(self, offset)
|
365
436
|
end
|
437
|
+
|
438
|
+
@cache = {}
|
439
|
+
@cache_hits = 0
|
366
440
|
end
|
367
441
|
end
|
368
442
|
|
@@ -380,15 +454,14 @@ module Citrus
|
|
380
454
|
# Returns a new Rule object depending on the type of object given.
|
381
455
|
def self.new(obj)
|
382
456
|
case obj
|
383
|
-
when Rule
|
384
|
-
when Symbol
|
385
|
-
when String
|
386
|
-
when
|
387
|
-
when
|
388
|
-
when
|
389
|
-
when Numeric then FixedWidth.new(obj.to_s)
|
457
|
+
when Rule then obj
|
458
|
+
when Symbol then Alias.new(obj)
|
459
|
+
when String, Regexp then Terminal.new(obj)
|
460
|
+
when Array then Sequence.new(obj)
|
461
|
+
when Range then Choice.new(obj.to_a)
|
462
|
+
when Numeric then Terminal.new(obj.to_s)
|
390
463
|
else
|
391
|
-
raise ArgumentError, "Invalid rule object:
|
464
|
+
raise ArgumentError, "Invalid rule object: %s" % obj.inspect
|
392
465
|
end
|
393
466
|
end
|
394
467
|
|
@@ -466,9 +539,8 @@ module Citrus
|
|
466
539
|
match
|
467
540
|
end
|
468
541
|
|
469
|
-
def create_match(data
|
470
|
-
|
471
|
-
extend_match(match, name)
|
542
|
+
def create_match(data)
|
543
|
+
extend_match(Match.new(data), name)
|
472
544
|
end
|
473
545
|
end
|
474
546
|
|
@@ -496,10 +568,9 @@ module Citrus
|
|
496
568
|
@rule ||= resolve!
|
497
569
|
end
|
498
570
|
|
499
|
-
# Returns the Match for this
|
500
|
-
|
501
|
-
|
502
|
-
m = input.match(rule, offset)
|
571
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
572
|
+
def match(input)
|
573
|
+
m = input.match(rule)
|
503
574
|
extend_match(m, name) if m
|
504
575
|
end
|
505
576
|
end
|
@@ -558,49 +629,15 @@ module Citrus
|
|
558
629
|
end
|
559
630
|
|
560
631
|
# A Terminal is a Rule that matches directly on the input stream and may not
|
561
|
-
# contain any other rule.
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
def initialize(rule)
|
566
|
-
@rule = rule
|
567
|
-
end
|
568
|
-
|
569
|
-
# The actual String or Regexp object this rule uses to match.
|
570
|
-
attr_reader :rule
|
571
|
-
|
572
|
-
# Returns the Citrus notation of this rule as a string.
|
573
|
-
def to_s
|
574
|
-
rule.inspect
|
575
|
-
end
|
576
|
-
end
|
577
|
-
|
578
|
-
# A FixedWidth is a Terminal that matches based on its length. The Citrus
|
579
|
-
# notation is any sequence of characters enclosed in either single or double
|
580
|
-
# quotes, e.g.:
|
632
|
+
# contain any other rule. Terminals may be created from either a String or a
|
633
|
+
# Regexp object. When created from strings, the Citrus notation is any
|
634
|
+
# sequence of characters enclosed in either single or double quotes, e.g.:
|
581
635
|
#
|
582
636
|
# 'expr'
|
583
637
|
# "expr"
|
584
638
|
#
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
def initialize(rule='')
|
589
|
-
raise ArgumentError, "FixedWidth must be a String" unless String === rule
|
590
|
-
super
|
591
|
-
end
|
592
|
-
|
593
|
-
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
|
594
|
-
# no match can be made.
|
595
|
-
def match(input, offset=0)
|
596
|
-
create_match(rule.dup, offset) if input[offset, rule.length] == rule
|
597
|
-
end
|
598
|
-
end
|
599
|
-
|
600
|
-
# An Expression is a Terminal that has the same semantics as a regular
|
601
|
-
# expression in Ruby. The expression must match at the beginning of the input
|
602
|
-
# (index 0). The Citrus notation is identical to Ruby's regular expression
|
603
|
-
# notation, e.g.:
|
639
|
+
# When created from a regular expression, the Citrus notation is identical to
|
640
|
+
# Ruby's regular expression notation, e.g.:
|
604
641
|
#
|
605
642
|
# /expr/
|
606
643
|
#
|
@@ -610,19 +647,34 @@ module Citrus
|
|
610
647
|
# [a-zA-Z]
|
611
648
|
# .
|
612
649
|
#
|
613
|
-
class
|
614
|
-
include
|
650
|
+
class Terminal
|
651
|
+
include Rule
|
615
652
|
|
616
|
-
def initialize(rule
|
617
|
-
|
618
|
-
|
653
|
+
def initialize(rule='')
|
654
|
+
case rule
|
655
|
+
when String
|
656
|
+
@string = rule
|
657
|
+
@rule = Regexp.new(Regexp.escape(rule))
|
658
|
+
when Regexp
|
659
|
+
@rule = rule
|
660
|
+
else
|
661
|
+
raise ArgumentError, "Cannot create terminal from object: %s" %
|
662
|
+
rule.inspect
|
663
|
+
end
|
619
664
|
end
|
620
665
|
|
621
|
-
#
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
666
|
+
# The actual Regexp object this rule uses to match.
|
667
|
+
attr_reader :rule
|
668
|
+
|
669
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
670
|
+
def match(input)
|
671
|
+
m = input.scan(@rule)
|
672
|
+
create_match(m) if m
|
673
|
+
end
|
674
|
+
|
675
|
+
# Returns the Citrus notation of this rule as a string.
|
676
|
+
def to_s
|
677
|
+
(@string || @rule).inspect
|
626
678
|
end
|
627
679
|
end
|
628
680
|
|
@@ -669,10 +721,9 @@ module Citrus
|
|
669
721
|
class AndPredicate
|
670
722
|
include Predicate
|
671
723
|
|
672
|
-
# Returns the Match for this rule on +input+
|
673
|
-
|
674
|
-
|
675
|
-
create_match('', offset) if input.match(rule, offset)
|
724
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
725
|
+
def match(input)
|
726
|
+
create_match('') if input.match(rule)
|
676
727
|
end
|
677
728
|
|
678
729
|
# Returns the Citrus notation of this rule as a string.
|
@@ -690,10 +741,9 @@ module Citrus
|
|
690
741
|
class NotPredicate
|
691
742
|
include Predicate
|
692
743
|
|
693
|
-
# Returns the Match for this rule on +input+
|
694
|
-
|
695
|
-
|
696
|
-
create_match('', offset) unless input.match(rule, offset)
|
744
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
745
|
+
def match(input)
|
746
|
+
create_match('') unless input.match(rule)
|
697
747
|
end
|
698
748
|
|
699
749
|
# Returns the Citrus notation of this rule as a string.
|
@@ -713,19 +763,16 @@ module Citrus
|
|
713
763
|
|
714
764
|
DOT_RULE = Rule.new(DOT)
|
715
765
|
|
716
|
-
# Returns the Match for this rule on +input+
|
717
|
-
|
718
|
-
def match(input, offset=0)
|
766
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
767
|
+
def match(input)
|
719
768
|
matches = []
|
720
|
-
|
721
|
-
|
722
|
-
m = input.match(DOT_RULE, os)
|
769
|
+
while input.match(rule).nil?
|
770
|
+
m = input.match(DOT_RULE)
|
723
771
|
break unless m
|
724
772
|
matches << m
|
725
|
-
os += m.length
|
726
773
|
end
|
727
774
|
# Create a single match from the aggregate text value of all submatches.
|
728
|
-
create_match(matches.join
|
775
|
+
create_match(matches.join) if matches.any?
|
729
776
|
end
|
730
777
|
|
731
778
|
# Returns the Citrus notation of this rule as a string.
|
@@ -757,11 +804,11 @@ module Citrus
|
|
757
804
|
# The label this rule adds to all its matches.
|
758
805
|
attr_reader :label
|
759
806
|
|
760
|
-
# Returns the Match for this rule on +input+
|
761
|
-
#
|
762
|
-
#
|
763
|
-
def match(input
|
764
|
-
m = input.match(rule
|
807
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
808
|
+
# When a Label makes a match, it re-names the match to the value of its
|
809
|
+
# #label.
|
810
|
+
def match(input)
|
811
|
+
m = input.match(rule)
|
765
812
|
extend_match(m, label) if m
|
766
813
|
end
|
767
814
|
|
@@ -799,18 +846,15 @@ module Citrus
|
|
799
846
|
@range = Range.new(min, max)
|
800
847
|
end
|
801
848
|
|
802
|
-
# Returns the Match for this rule on +input+
|
803
|
-
|
804
|
-
def match(input, offset=0)
|
849
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
850
|
+
def match(input)
|
805
851
|
matches = []
|
806
|
-
os = offset
|
807
852
|
while matches.length < @range.end
|
808
|
-
m = input.match(rule
|
853
|
+
m = input.match(rule)
|
809
854
|
break unless m
|
810
855
|
matches << m
|
811
|
-
os += m.length
|
812
856
|
end
|
813
|
-
create_match(matches
|
857
|
+
create_match(matches) if @range.include?(matches.length)
|
814
858
|
end
|
815
859
|
|
816
860
|
# The minimum number of times this rule must match.
|
@@ -846,6 +890,7 @@ module Citrus
|
|
846
890
|
module List
|
847
891
|
include Nonterminal
|
848
892
|
|
893
|
+
# See Rule#paren?.
|
849
894
|
def paren?
|
850
895
|
rules.length > 1
|
851
896
|
end
|
@@ -859,11 +904,10 @@ module Citrus
|
|
859
904
|
class Choice
|
860
905
|
include List
|
861
906
|
|
862
|
-
# Returns the Match for this rule on +input+
|
863
|
-
|
864
|
-
def match(input, offset=0)
|
907
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
908
|
+
def match(input)
|
865
909
|
rules.each do |rule|
|
866
|
-
m = input.match(rule
|
910
|
+
m = input.match(rule)
|
867
911
|
return extend_match(m, name) if m
|
868
912
|
end
|
869
913
|
nil
|
@@ -883,18 +927,15 @@ module Citrus
|
|
883
927
|
class Sequence
|
884
928
|
include List
|
885
929
|
|
886
|
-
# Returns the Match for this rule on +input+
|
887
|
-
|
888
|
-
def match(input, offset=0)
|
930
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
931
|
+
def match(input)
|
889
932
|
matches = []
|
890
|
-
os = offset
|
891
933
|
rules.each do |rule|
|
892
|
-
m = input.match(rule
|
934
|
+
m = input.match(rule)
|
893
935
|
break unless m
|
894
936
|
matches << m
|
895
|
-
os += m.length
|
896
937
|
end
|
897
|
-
create_match(matches
|
938
|
+
create_match(matches) if matches.length == rules.length
|
898
939
|
end
|
899
940
|
|
900
941
|
# Returns the Citrus notation of this rule as a string.
|
@@ -907,24 +948,19 @@ module Citrus
|
|
907
948
|
# match may contain any number of other matches. This class provides several
|
908
949
|
# convenient tree traversal methods that help when examining parse results.
|
909
950
|
class Match < String
|
910
|
-
def initialize(data
|
951
|
+
def initialize(data)
|
911
952
|
case data
|
912
953
|
when String
|
913
954
|
super(data)
|
914
|
-
when MatchData
|
915
|
-
super(data[0])
|
916
|
-
@captures = data.captures
|
917
955
|
when Array
|
918
956
|
super(data.join)
|
919
957
|
@matches = data
|
958
|
+
else
|
959
|
+
raise ArgumentError, "Cannot create match from object: %s" %
|
960
|
+
data.inspect
|
920
961
|
end
|
921
|
-
|
922
|
-
@offset = offset
|
923
962
|
end
|
924
963
|
|
925
|
-
# The offset in the input at which this match occurred.
|
926
|
-
attr_reader :offset
|
927
|
-
|
928
964
|
# An array of all names of this match. A name is added to a match object
|
929
965
|
# for each rule that returns that object when matching. These names can then
|
930
966
|
# be used to determine which rules were satisfied by a given match.
|
@@ -947,12 +983,6 @@ module Citrus
|
|
947
983
|
@matches ||= []
|
948
984
|
end
|
949
985
|
|
950
|
-
# An array of substrings returned by MatchData#captures if this match was
|
951
|
-
# created by an Expression.
|
952
|
-
def captures
|
953
|
-
@captures ||= []
|
954
|
-
end
|
955
|
-
|
956
986
|
# Returns an array of all sub-matches with the given +name+. If +deep+ is
|
957
987
|
# +false+, returns only sub-matches that are immediate descendants of this
|
958
988
|
# match.
|