citrus 1.8.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +53 -46
- data/benchmark/after.dat +192 -0
- data/benchmark/before.dat +192 -0
- data/benchmark/master.dat +192 -0
- data/doc/background.markdown +9 -10
- data/doc/example.markdown +24 -15
- data/doc/syntax.markdown +20 -21
- data/lib/citrus.rb +208 -178
- data/lib/citrus/debug.rb +34 -4
- data/test/file_test.rb +12 -12
- data/test/helper.rb +27 -5
- data/test/match_test.rb +18 -34
- data/test/parse_error_test.rb +56 -0
- data/test/terminal_test.rb +56 -0
- metadata +10 -7
- data/test/expression_test.rb +0 -29
- data/test/fixed_width_test.rb +0 -37
@@ -0,0 +1,192 @@
|
|
1
|
+
12 0
|
2
|
+
30 0
|
3
|
+
35 0
|
4
|
+
1749 19
|
5
|
+
1962 49
|
6
|
+
2383 10
|
7
|
+
3728 49
|
8
|
+
3919 59
|
9
|
+
3952 60
|
10
|
+
3995 49
|
11
|
+
4063 50
|
12
|
+
4325 70
|
13
|
+
4527 60
|
14
|
+
4570 60
|
15
|
+
4607 70
|
16
|
+
4654 59
|
17
|
+
4679 70
|
18
|
+
4774 59
|
19
|
+
4968 70
|
20
|
+
5059 39
|
21
|
+
5383 39
|
22
|
+
5915 70
|
23
|
+
6109 70
|
24
|
+
6122 80
|
25
|
+
6218 90
|
26
|
+
6332 70
|
27
|
+
6681 59
|
28
|
+
7440 90
|
29
|
+
7530 80
|
30
|
+
7605 79
|
31
|
+
8155 89
|
32
|
+
8402 100
|
33
|
+
8420 100
|
34
|
+
8617 110
|
35
|
+
8635 109
|
36
|
+
8841 120
|
37
|
+
8843 109
|
38
|
+
8852 100
|
39
|
+
9151 99
|
40
|
+
9271 89
|
41
|
+
9521 99
|
42
|
+
9525 89
|
43
|
+
9566 100
|
44
|
+
9584 109
|
45
|
+
9642 110
|
46
|
+
10138 130
|
47
|
+
10181 109
|
48
|
+
10225 109
|
49
|
+
10338 110
|
50
|
+
10449 99
|
51
|
+
10629 100
|
52
|
+
10763 129
|
53
|
+
10817 130
|
54
|
+
11059 120
|
55
|
+
11062 120
|
56
|
+
11215 119
|
57
|
+
11698 129
|
58
|
+
11891 120
|
59
|
+
11945 139
|
60
|
+
11956 129
|
61
|
+
12018 150
|
62
|
+
12053 109
|
63
|
+
12178 140
|
64
|
+
12283 119
|
65
|
+
12326 99
|
66
|
+
12430 119
|
67
|
+
12438 130
|
68
|
+
12572 130
|
69
|
+
12638 129
|
70
|
+
12687 130
|
71
|
+
12703 129
|
72
|
+
12896 129
|
73
|
+
12922 120
|
74
|
+
12996 129
|
75
|
+
13137 170
|
76
|
+
13211 140
|
77
|
+
13462 140
|
78
|
+
13477 129
|
79
|
+
13576 140
|
80
|
+
13577 140
|
81
|
+
13584 129
|
82
|
+
13605 130
|
83
|
+
13631 160
|
84
|
+
14216 150
|
85
|
+
14237 139
|
86
|
+
14260 130
|
87
|
+
14367 129
|
88
|
+
14371 160
|
89
|
+
14741 170
|
90
|
+
14893 139
|
91
|
+
14910 160
|
92
|
+
14917 149
|
93
|
+
14977 149
|
94
|
+
15049 150
|
95
|
+
15191 179
|
96
|
+
15382 150
|
97
|
+
15618 179
|
98
|
+
15623 179
|
99
|
+
15629 150
|
100
|
+
15856 170
|
101
|
+
16496 200
|
102
|
+
16512 160
|
103
|
+
16956 179
|
104
|
+
17074 149
|
105
|
+
17237 189
|
106
|
+
17371 170
|
107
|
+
17568 189
|
108
|
+
17945 199
|
109
|
+
18147 190
|
110
|
+
18343 210
|
111
|
+
18417 199
|
112
|
+
18823 200
|
113
|
+
18970 210
|
114
|
+
19285 220
|
115
|
+
19333 189
|
116
|
+
19500 199
|
117
|
+
19548 219
|
118
|
+
19634 189
|
119
|
+
19673 209
|
120
|
+
19689 209
|
121
|
+
19909 199
|
122
|
+
20054 199
|
123
|
+
20107 229
|
124
|
+
20248 219
|
125
|
+
20580 219
|
126
|
+
20744 230
|
127
|
+
20806 210
|
128
|
+
20954 230
|
129
|
+
21034 209
|
130
|
+
21187 199
|
131
|
+
21303 239
|
132
|
+
21450 209
|
133
|
+
21626 189
|
134
|
+
21931 289
|
135
|
+
21950 230
|
136
|
+
22359 250
|
137
|
+
22626 209
|
138
|
+
22638 279
|
139
|
+
22772 250
|
140
|
+
22885 259
|
141
|
+
22897 240
|
142
|
+
23114 330
|
143
|
+
23242 229
|
144
|
+
23428 279
|
145
|
+
23452 240
|
146
|
+
23495 250
|
147
|
+
23499 260
|
148
|
+
23558 270
|
149
|
+
23744 220
|
150
|
+
23881 240
|
151
|
+
23945 269
|
152
|
+
24361 250
|
153
|
+
24501 240
|
154
|
+
24642 269
|
155
|
+
24672 249
|
156
|
+
24694 219
|
157
|
+
24706 270
|
158
|
+
24931 290
|
159
|
+
25065 300
|
160
|
+
25140 290
|
161
|
+
25402 250
|
162
|
+
25702 289
|
163
|
+
25743 290
|
164
|
+
27139 359
|
165
|
+
27316 299
|
166
|
+
27333 290
|
167
|
+
27414 309
|
168
|
+
27771 309
|
169
|
+
27798 259
|
170
|
+
28583 320
|
171
|
+
28906 309
|
172
|
+
29025 360
|
173
|
+
29209 310
|
174
|
+
29272 280
|
175
|
+
29273 310
|
176
|
+
29359 270
|
177
|
+
29577 330
|
178
|
+
30886 359
|
179
|
+
31170 359
|
180
|
+
31593 330
|
181
|
+
32460 369
|
182
|
+
32486 390
|
183
|
+
32630 399
|
184
|
+
33010 440
|
185
|
+
33137 419
|
186
|
+
33142 389
|
187
|
+
33739 340
|
188
|
+
39880 470
|
189
|
+
39940 460
|
190
|
+
42952 489
|
191
|
+
43227 500
|
192
|
+
52855 640
|
data/doc/background.markdown
CHANGED
@@ -29,9 +29,9 @@ A [Rule](api/classes/Citrus/Rule.html) is an object that specifies some matching
|
|
29
29
|
behavior on a string. There are two types of rules: terminals and non-terminals.
|
30
30
|
Terminals can be either Ruby strings or regular expressions that specify some
|
31
31
|
input to match. For example, a terminal created from the string "end" would
|
32
|
-
match any sequence of the characters "e", "n", and "d", in that order.
|
33
|
-
|
34
|
-
|
32
|
+
match any sequence of the characters "e", "n", and "d", in that order. Terminals
|
33
|
+
created from regular expressions may match any sequence of characters that can
|
34
|
+
be generated from that expression.
|
35
35
|
|
36
36
|
Non-terminals are rules that may contain other rules but do not themselves match
|
37
37
|
directly on the input. For example, a Repeat is a non-terminal that may contain
|
@@ -58,10 +58,10 @@ similar to Ruby's super keyword.
|
|
58
58
|
## Matches
|
59
59
|
|
60
60
|
Matches are created by rule objects when they match on the input. A
|
61
|
-
[Match](api/classes/Citrus/Match.html)
|
62
|
-
[String](http://ruby-doc.org/core/classes/String.html) with some extra
|
63
|
-
information attached such as the name(s) of the rule(s) which
|
64
|
-
|
61
|
+
[Match](api/classes/Citrus/Match.html) is actually a
|
62
|
+
[String](http://ruby-doc.org/core/classes/String.html) object with some extra
|
63
|
+
information attached such as the name(s) of the rule(s) from which it was
|
64
|
+
generated and any submatches it may contain.
|
65
65
|
|
66
66
|
During a parse, matches are arranged in a tree structure where any match may
|
67
67
|
contain any number of other matches. This structure is determined by the way in
|
@@ -70,6 +70,5 @@ match that is created from a non-terminal rule that contains several other
|
|
70
70
|
terminals will likewise contain several matches, one for each terminal.
|
71
71
|
|
72
72
|
Match objects may be extended with semantic information in the form of methods.
|
73
|
-
These methods
|
74
|
-
|
75
|
-
and any submatches.
|
73
|
+
These methods should provide various interpretations for the semantic value of a
|
74
|
+
match.
|
data/doc/example.markdown
CHANGED
@@ -47,9 +47,9 @@ Submatches are created whenever a rule contains another rule. For example, in
|
|
47
47
|
the grammar above the number rule matches a string of digits followed by white
|
48
48
|
space. Thus, a match generated by the number rule will contain two submatches.
|
49
49
|
|
50
|
-
We can
|
51
|
-
matches when they are created
|
52
|
-
|
50
|
+
We can define methods inside a set of curly braces that will be used to extend
|
51
|
+
matches when they are created. This works in similar fashion to using Ruby's
|
52
|
+
blocks. Let's extend the `Addition` grammar using this technique.
|
53
53
|
|
54
54
|
grammar Addition
|
55
55
|
rule additive
|
@@ -83,25 +83,27 @@ on all match objects that result from matches of those particular rules. It's
|
|
83
83
|
easiest to explain what is going on here by starting with the lowest level
|
84
84
|
block, which is defined within the number rule.
|
85
85
|
|
86
|
-
The semantic block associated with the number rule defines one method, value
|
86
|
+
The semantic block associated with the number rule defines one method, `value`.
|
87
87
|
Inside this method, we can see that the value of a number match is determined to
|
88
|
-
be its text value, stripped of white space and converted to an integer.
|
89
|
-
that matches are simply strings, so the `strip`
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
88
|
+
be its text value, stripped of white space and converted to an integer.
|
89
|
+
[Remember](background.html) that matches are simply strings, so the `strip`
|
90
|
+
method in this case is actually
|
91
|
+
[String#strip](http://ruby-doc.org/core/classes/String.html#M000820).
|
92
|
+
|
93
|
+
The `additive` rule also extends its matches with a `value` method. Notice the
|
94
|
+
use of the `term` label within the rule definition. This label allows the match
|
95
|
+
that is created by either the additive or the number rule to be retrieved using
|
96
|
+
the `term` label. The value of an additive is determined to be the values of its
|
96
97
|
`number` and `term` matches added together using Ruby's addition operator.
|
97
98
|
|
98
99
|
Since additive is the first rule defined in the grammar, any match that results
|
99
100
|
from parsing a string with this grammar will have a `value` method that can be
|
100
101
|
used to recursively calculate the collective value of the entire match tree.
|
101
102
|
|
102
|
-
To give it a try, save the code for the Addition grammar in a file called
|
103
|
-
addition.citrus. Next, assuming you have the Citrus
|
104
|
-
|
103
|
+
To give it a try, save the code for the `Addition` grammar in a file called
|
104
|
+
addition.citrus. Next, assuming you have the Citrus
|
105
|
+
[gem](https://rubygems.org/gems/citrus) installed, try the following sequence of
|
106
|
+
commands in a terminal.
|
105
107
|
|
106
108
|
$ irb
|
107
109
|
> require 'citrus'
|
@@ -115,6 +117,13 @@ following sequence of commands in a terminal.
|
|
115
117
|
|
116
118
|
Congratulations! You just ran your first piece of Citrus code.
|
117
119
|
|
120
|
+
One interesting thing to notice about the above sequence of commands is the
|
121
|
+
return value of [Citrus#load](api/classes/Citrus.html#M000003). When you use
|
122
|
+
`Citrus.load` to
|
123
|
+
load a grammar file (and likewise [Citrus#eval](api/classes/Citrus.html#M000004) to evaluate
|
124
|
+
a raw string of grammar code), the return value is an array of all the grammars
|
125
|
+
present in that file.
|
126
|
+
|
118
127
|
Take a look at
|
119
128
|
[examples/calc.citrus](http://github.com/mjijackson/citrus/blob/master/examples/calc.citrus)
|
120
129
|
for an example of a calculator that is able to parse and evaluate more complex
|
data/doc/syntax.markdown
CHANGED
@@ -21,8 +21,7 @@ compatibility with other parsing expression implementations.
|
|
21
21
|
[\x00-\xFF] # match any octet
|
22
22
|
. # match anything, even new lines
|
23
23
|
|
24
|
-
See [
|
25
|
-
[Expression](api/classes/Citrus/Expression.html) for more information.
|
24
|
+
See [Terminal](api/classes/Citrus/Terminal.html) for more information.
|
26
25
|
|
27
26
|
## Repetition
|
28
27
|
|
@@ -108,22 +107,22 @@ See [Label](api/classes/Citrus/Label.html) for more information.
|
|
108
107
|
The following table contains a list of all Citrus operators and their
|
109
108
|
precedence. A higher precedence indicates tighter binding.
|
110
109
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
128
|
-
|
129
|
-
|
110
|
+
Operator | Name | Precedence
|
111
|
+
------------------------- | ------------------------- | ----------
|
112
|
+
`''` | String (single quoted) | 6
|
113
|
+
`""` | String (double quoted) | 6
|
114
|
+
`[]` | Character class | 6
|
115
|
+
`.` | Dot (any character) | 6
|
116
|
+
`//` | Regular expression | 6
|
117
|
+
`()` | Grouping | 6
|
118
|
+
`*` | Repetition (arbitrary) | 5
|
119
|
+
`+` | Repetition (one or more) | 5
|
120
|
+
`?` | Repetition (zero or one) | 5
|
121
|
+
`&` | And predicate | 4
|
122
|
+
`!` | Not predicate | 4
|
123
|
+
`~` | But predicate | 4
|
124
|
+
`:` | Label | 4
|
125
|
+
`<>` | Extension (module name) | 3
|
126
|
+
`{}` | Extension (literal) | 3
|
127
|
+
`e1 e2` | Sequence | 2
|
128
|
+
<code>e1 | e2</code> | Ordered choice | 1
|
data/lib/citrus.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
1
3
|
# Citrus is a compact and powerful parsing library for Ruby that combines the
|
2
4
|
# elegance and expressiveness of the language with the simplicity and power of
|
3
5
|
# parsing expressions.
|
@@ -6,14 +8,14 @@
|
|
6
8
|
module Citrus
|
7
9
|
autoload :File, 'citrus/file'
|
8
10
|
|
9
|
-
VERSION = [
|
11
|
+
VERSION = [2, 0, 0]
|
10
12
|
|
11
13
|
# Returns the current version of Citrus as a string.
|
12
14
|
def self.version
|
13
15
|
VERSION.join('.')
|
14
16
|
end
|
15
17
|
|
16
|
-
# A pattern to match any character, including
|
18
|
+
# A pattern to match any character, including \\n.
|
17
19
|
DOT = /./m
|
18
20
|
|
19
21
|
Infinity = 1.0 / 0
|
@@ -39,25 +41,77 @@ module Citrus
|
|
39
41
|
class ParseError < Exception
|
40
42
|
def initialize(input)
|
41
43
|
@input = input
|
42
|
-
|
43
|
-
|
44
|
-
msg = "Failed to parse input at offset %d" % max_offset
|
45
|
-
msg += ", just after %s" % c[s, c.length].inspect + "\n"
|
44
|
+
msg = "Failed to parse input at offset %d" % offset
|
45
|
+
msg << detail
|
46
46
|
super(msg)
|
47
47
|
end
|
48
48
|
|
49
49
|
# The Input object that was used for the parse.
|
50
50
|
attr_reader :input
|
51
51
|
|
52
|
-
# Returns the
|
53
|
-
|
52
|
+
# Returns the 0-based offset at which the error occurred in the input, i.e.
|
53
|
+
# the maximum offset in the input that was successfully parsed before the
|
54
|
+
# error occurred.
|
55
|
+
def offset
|
54
56
|
input.max_offset
|
55
57
|
end
|
56
58
|
|
57
|
-
# Returns the
|
58
|
-
|
59
|
-
|
60
|
-
|
59
|
+
# Returns the text of the line on which the error occurred.
|
60
|
+
def line
|
61
|
+
lines[line_index]
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the 1-based number of the line in the input where the error
|
65
|
+
# occurred.
|
66
|
+
def line_number
|
67
|
+
line_index + 1
|
68
|
+
end
|
69
|
+
|
70
|
+
alias lineno line_number
|
71
|
+
|
72
|
+
# Returns the 0-based offset at which the error occurred on the line on
|
73
|
+
# which it occurred.
|
74
|
+
def line_offset
|
75
|
+
pos = 0
|
76
|
+
each_line do |line|
|
77
|
+
len = line.length
|
78
|
+
return (offset - pos) if pos + len >= offset
|
79
|
+
pos += len
|
80
|
+
end
|
81
|
+
0
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns a string that, when printed, gives a visual representation of
|
85
|
+
# exactly where the error occurred on its line in the input.
|
86
|
+
def detail
|
87
|
+
"%s\n%s^" % [line, ' ' * line_offset]
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def string
|
93
|
+
input.string
|
94
|
+
end
|
95
|
+
|
96
|
+
def lines
|
97
|
+
string.send(string.respond_to?(:lines) ? :lines : :to_s).to_a
|
98
|
+
end
|
99
|
+
|
100
|
+
def each_line(&block)
|
101
|
+
string.each_line(&block)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns the 0-based number of the line in the input where the error
|
105
|
+
# occurred.
|
106
|
+
def line_index
|
107
|
+
pos = 0
|
108
|
+
idx = 0
|
109
|
+
each_line do |line|
|
110
|
+
pos += line.length
|
111
|
+
return idx if pos >= offset
|
112
|
+
idx += 1
|
113
|
+
end
|
114
|
+
0
|
61
115
|
end
|
62
116
|
end
|
63
117
|
|
@@ -84,6 +138,7 @@ module Citrus
|
|
84
138
|
# exposes Module#include.
|
85
139
|
def self.included(mod)
|
86
140
|
mod.extend(GrammarMethods)
|
141
|
+
# Expose #include so it can be called publicly.
|
87
142
|
class << mod; public :include end
|
88
143
|
end
|
89
144
|
end
|
@@ -91,7 +146,7 @@ module Citrus
|
|
91
146
|
# Contains methods that are available to Grammar modules at the class level.
|
92
147
|
module GrammarMethods
|
93
148
|
def self.extend_object(obj)
|
94
|
-
raise ArgumentError, "Grammars must be
|
149
|
+
raise ArgumentError, "Grammars must be Modules" unless Module === obj
|
95
150
|
super
|
96
151
|
end
|
97
152
|
|
@@ -153,9 +208,9 @@ module Citrus
|
|
153
208
|
# It is important to note that this method will also check any included
|
154
209
|
# grammars for a rule with the given +name+ if one cannot be found in this
|
155
210
|
# grammar.
|
156
|
-
def rule(name, obj=nil)
|
211
|
+
def rule(name, obj=nil, &block)
|
157
212
|
sym = name.to_sym
|
158
|
-
obj =
|
213
|
+
obj = block.call if block
|
159
214
|
|
160
215
|
if obj
|
161
216
|
rule_names << sym unless has_rule?(sym)
|
@@ -256,9 +311,9 @@ module Citrus
|
|
256
311
|
# Specifies a Module that will be used to extend all matches created with
|
257
312
|
# the given +rule+. A block may also be given that will be used to create
|
258
313
|
# an anonymous module. See Rule#ext=.
|
259
|
-
def ext(rule, mod=nil)
|
314
|
+
def ext(rule, mod=nil, &block)
|
260
315
|
rule = Rule.new(rule)
|
261
|
-
mod =
|
316
|
+
mod = block if block
|
262
317
|
rule.extension = mod if mod
|
263
318
|
rule
|
264
319
|
end
|
@@ -273,9 +328,11 @@ module Citrus
|
|
273
328
|
root_rule = rule(opts[:root])
|
274
329
|
raise 'No rule named "%s"' % root unless root_rule
|
275
330
|
|
276
|
-
input = Input.new(string
|
277
|
-
|
331
|
+
input = Input.new(string)
|
332
|
+
input.memoize! if opts[:memoize]
|
333
|
+
input.pos = opts[:offset] if opts[:offset] > 0
|
278
334
|
|
335
|
+
match = input.match(root_rule)
|
279
336
|
if match.nil? || (opts[:consume] && input.length != match.length)
|
280
337
|
raise ParseError.new(input)
|
281
338
|
end
|
@@ -290,10 +347,8 @@ module Citrus
|
|
290
347
|
# root:: The name of the root rule to use for the parse. Defaults
|
291
348
|
# to the name supplied by calling #root.
|
292
349
|
# memoize:: If this is +true+ the matches generated during a parse are
|
293
|
-
# memoized.
|
294
|
-
#
|
295
|
-
# significantly more in terms of time and memory required.
|
296
|
-
# Defaults to +false+.
|
350
|
+
# memoized. See Input#memoize! for more information. Defaults to
|
351
|
+
# +false+.
|
297
352
|
# consume:: If this is +true+ a ParseError will be raised during a parse
|
298
353
|
# unless the entire input string is consumed. Defaults to
|
299
354
|
# +false+.
|
@@ -308,61 +363,80 @@ module Citrus
|
|
308
363
|
|
309
364
|
# This class represents the core of the parsing algorithm. It wraps the input
|
310
365
|
# string and serves matches to all nonterminals.
|
311
|
-
class Input
|
312
|
-
|
313
|
-
|
314
|
-
def initialize(string, memoize=false)
|
315
|
-
@string = string
|
366
|
+
class Input < StringScanner
|
367
|
+
def initialize(string)
|
368
|
+
super(string)
|
316
369
|
@max_offset = 0
|
317
|
-
if memoize
|
318
|
-
@cache = {}
|
319
|
-
@cache_hits = 0
|
320
|
-
end
|
321
370
|
end
|
322
371
|
|
323
|
-
# The
|
324
|
-
attr_reader :string
|
325
|
-
|
326
|
-
# The maximum offset that has been achieved.
|
372
|
+
# The maximum offset that has been achieved during a parse.
|
327
373
|
attr_reader :max_offset
|
328
374
|
|
329
|
-
# A
|
330
|
-
#
|
375
|
+
# A nested hash of rule id's to offsets and their respective matches. Only
|
376
|
+
# present if memoing is enabled.
|
331
377
|
attr_reader :cache
|
332
378
|
|
333
379
|
# The number of times the cache was hit. Only present if memoing is enabled.
|
334
380
|
attr_reader :cache_hits
|
335
381
|
|
336
|
-
# Sends all arguments to this input's +string+.
|
337
|
-
def [](*args)
|
338
|
-
@string.__send__(:[], *args)
|
339
|
-
end
|
340
|
-
|
341
382
|
# Returns the length of this input.
|
342
383
|
def length
|
343
|
-
|
384
|
+
string.length
|
344
385
|
end
|
345
386
|
|
346
|
-
# Returns the match for a given +rule+ at
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
387
|
+
# Returns the match for a given +rule+ at the current position in the input.
|
388
|
+
def match(rule)
|
389
|
+
offset = pos
|
390
|
+
match = rule.match(self)
|
391
|
+
|
392
|
+
if match
|
393
|
+
@max_offset = pos if pos > @max_offset
|
394
|
+
else
|
395
|
+
# Reset the position for the next attempt at a match.
|
396
|
+
self.pos = offset
|
397
|
+
end
|
398
|
+
|
399
|
+
match
|
400
|
+
end
|
353
401
|
|
354
|
-
|
355
|
-
|
402
|
+
# Returns true if this input uses memoization to cache match results. See
|
403
|
+
# #memoize!.
|
404
|
+
def memoized?
|
405
|
+
!! @cache
|
406
|
+
end
|
407
|
+
|
408
|
+
# Modifies this object to cache match results during a parse. This technique
|
409
|
+
# (also known as "Packrat" parsing) guarantees parsers will operate in
|
410
|
+
# linear time but costs significantly more in terms of time and memory
|
411
|
+
# required to perform a parse. For more information, please read the paper
|
412
|
+
# on Packrat parsing at http://pdos.csail.mit.edu/~baford/packrat/icfp02/.
|
413
|
+
def memoize!
|
414
|
+
return if memoized?
|
415
|
+
|
416
|
+
# Using +instance_eval+ here preserves access to +super+ within the
|
417
|
+
# methods we define inside the block.
|
418
|
+
instance_eval do
|
419
|
+
def match(rule)
|
420
|
+
c = @cache[rule.id] ||= {}
|
421
|
+
|
422
|
+
if c.key?(pos)
|
423
|
+
@cache_hits += 1
|
424
|
+
c[pos]
|
425
|
+
else
|
426
|
+
c[pos] = super
|
427
|
+
end
|
428
|
+
end
|
356
429
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
430
|
+
def reset
|
431
|
+
super
|
432
|
+
@max_offset = 0
|
433
|
+
@cache = {}
|
434
|
+
@cache_hits = 0
|
362
435
|
end
|
363
|
-
else
|
364
|
-
rule.match(self, offset)
|
365
436
|
end
|
437
|
+
|
438
|
+
@cache = {}
|
439
|
+
@cache_hits = 0
|
366
440
|
end
|
367
441
|
end
|
368
442
|
|
@@ -380,15 +454,14 @@ module Citrus
|
|
380
454
|
# Returns a new Rule object depending on the type of object given.
|
381
455
|
def self.new(obj)
|
382
456
|
case obj
|
383
|
-
when Rule
|
384
|
-
when Symbol
|
385
|
-
when String
|
386
|
-
when
|
387
|
-
when
|
388
|
-
when
|
389
|
-
when Numeric then FixedWidth.new(obj.to_s)
|
457
|
+
when Rule then obj
|
458
|
+
when Symbol then Alias.new(obj)
|
459
|
+
when String, Regexp then Terminal.new(obj)
|
460
|
+
when Array then Sequence.new(obj)
|
461
|
+
when Range then Choice.new(obj.to_a)
|
462
|
+
when Numeric then Terminal.new(obj.to_s)
|
390
463
|
else
|
391
|
-
raise ArgumentError, "Invalid rule object:
|
464
|
+
raise ArgumentError, "Invalid rule object: %s" % obj.inspect
|
392
465
|
end
|
393
466
|
end
|
394
467
|
|
@@ -466,9 +539,8 @@ module Citrus
|
|
466
539
|
match
|
467
540
|
end
|
468
541
|
|
469
|
-
def create_match(data
|
470
|
-
|
471
|
-
extend_match(match, name)
|
542
|
+
def create_match(data)
|
543
|
+
extend_match(Match.new(data), name)
|
472
544
|
end
|
473
545
|
end
|
474
546
|
|
@@ -496,10 +568,9 @@ module Citrus
|
|
496
568
|
@rule ||= resolve!
|
497
569
|
end
|
498
570
|
|
499
|
-
# Returns the Match for this
|
500
|
-
|
501
|
-
|
502
|
-
m = input.match(rule, offset)
|
571
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
572
|
+
def match(input)
|
573
|
+
m = input.match(rule)
|
503
574
|
extend_match(m, name) if m
|
504
575
|
end
|
505
576
|
end
|
@@ -558,49 +629,15 @@ module Citrus
|
|
558
629
|
end
|
559
630
|
|
560
631
|
# A Terminal is a Rule that matches directly on the input stream and may not
|
561
|
-
# contain any other rule.
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
def initialize(rule)
|
566
|
-
@rule = rule
|
567
|
-
end
|
568
|
-
|
569
|
-
# The actual String or Regexp object this rule uses to match.
|
570
|
-
attr_reader :rule
|
571
|
-
|
572
|
-
# Returns the Citrus notation of this rule as a string.
|
573
|
-
def to_s
|
574
|
-
rule.inspect
|
575
|
-
end
|
576
|
-
end
|
577
|
-
|
578
|
-
# A FixedWidth is a Terminal that matches based on its length. The Citrus
|
579
|
-
# notation is any sequence of characters enclosed in either single or double
|
580
|
-
# quotes, e.g.:
|
632
|
+
# contain any other rule. Terminals may be created from either a String or a
|
633
|
+
# Regexp object. When created from strings, the Citrus notation is any
|
634
|
+
# sequence of characters enclosed in either single or double quotes, e.g.:
|
581
635
|
#
|
582
636
|
# 'expr'
|
583
637
|
# "expr"
|
584
638
|
#
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
def initialize(rule='')
|
589
|
-
raise ArgumentError, "FixedWidth must be a String" unless String === rule
|
590
|
-
super
|
591
|
-
end
|
592
|
-
|
593
|
-
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
|
594
|
-
# no match can be made.
|
595
|
-
def match(input, offset=0)
|
596
|
-
create_match(rule.dup, offset) if input[offset, rule.length] == rule
|
597
|
-
end
|
598
|
-
end
|
599
|
-
|
600
|
-
# An Expression is a Terminal that has the same semantics as a regular
|
601
|
-
# expression in Ruby. The expression must match at the beginning of the input
|
602
|
-
# (index 0). The Citrus notation is identical to Ruby's regular expression
|
603
|
-
# notation, e.g.:
|
639
|
+
# When created from a regular expression, the Citrus notation is identical to
|
640
|
+
# Ruby's regular expression notation, e.g.:
|
604
641
|
#
|
605
642
|
# /expr/
|
606
643
|
#
|
@@ -610,19 +647,34 @@ module Citrus
|
|
610
647
|
# [a-zA-Z]
|
611
648
|
# .
|
612
649
|
#
|
613
|
-
class
|
614
|
-
include
|
650
|
+
class Terminal
|
651
|
+
include Rule
|
615
652
|
|
616
|
-
def initialize(rule
|
617
|
-
|
618
|
-
|
653
|
+
def initialize(rule='')
|
654
|
+
case rule
|
655
|
+
when String
|
656
|
+
@string = rule
|
657
|
+
@rule = Regexp.new(Regexp.escape(rule))
|
658
|
+
when Regexp
|
659
|
+
@rule = rule
|
660
|
+
else
|
661
|
+
raise ArgumentError, "Cannot create terminal from object: %s" %
|
662
|
+
rule.inspect
|
663
|
+
end
|
619
664
|
end
|
620
665
|
|
621
|
-
#
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
666
|
+
# The actual Regexp object this rule uses to match.
|
667
|
+
attr_reader :rule
|
668
|
+
|
669
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
670
|
+
def match(input)
|
671
|
+
m = input.scan(@rule)
|
672
|
+
create_match(m) if m
|
673
|
+
end
|
674
|
+
|
675
|
+
# Returns the Citrus notation of this rule as a string.
|
676
|
+
def to_s
|
677
|
+
(@string || @rule).inspect
|
626
678
|
end
|
627
679
|
end
|
628
680
|
|
@@ -669,10 +721,9 @@ module Citrus
|
|
669
721
|
class AndPredicate
|
670
722
|
include Predicate
|
671
723
|
|
672
|
-
# Returns the Match for this rule on +input+
|
673
|
-
|
674
|
-
|
675
|
-
create_match('', offset) if input.match(rule, offset)
|
724
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
725
|
+
def match(input)
|
726
|
+
create_match('') if input.match(rule)
|
676
727
|
end
|
677
728
|
|
678
729
|
# Returns the Citrus notation of this rule as a string.
|
@@ -690,10 +741,9 @@ module Citrus
|
|
690
741
|
class NotPredicate
|
691
742
|
include Predicate
|
692
743
|
|
693
|
-
# Returns the Match for this rule on +input+
|
694
|
-
|
695
|
-
|
696
|
-
create_match('', offset) unless input.match(rule, offset)
|
744
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
745
|
+
def match(input)
|
746
|
+
create_match('') unless input.match(rule)
|
697
747
|
end
|
698
748
|
|
699
749
|
# Returns the Citrus notation of this rule as a string.
|
@@ -713,19 +763,16 @@ module Citrus
|
|
713
763
|
|
714
764
|
DOT_RULE = Rule.new(DOT)
|
715
765
|
|
716
|
-
# Returns the Match for this rule on +input+
|
717
|
-
|
718
|
-
def match(input, offset=0)
|
766
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
767
|
+
def match(input)
|
719
768
|
matches = []
|
720
|
-
|
721
|
-
|
722
|
-
m = input.match(DOT_RULE, os)
|
769
|
+
while input.match(rule).nil?
|
770
|
+
m = input.match(DOT_RULE)
|
723
771
|
break unless m
|
724
772
|
matches << m
|
725
|
-
os += m.length
|
726
773
|
end
|
727
774
|
# Create a single match from the aggregate text value of all submatches.
|
728
|
-
create_match(matches.join
|
775
|
+
create_match(matches.join) if matches.any?
|
729
776
|
end
|
730
777
|
|
731
778
|
# Returns the Citrus notation of this rule as a string.
|
@@ -757,11 +804,11 @@ module Citrus
|
|
757
804
|
# The label this rule adds to all its matches.
|
758
805
|
attr_reader :label
|
759
806
|
|
760
|
-
# Returns the Match for this rule on +input+
|
761
|
-
#
|
762
|
-
#
|
763
|
-
def match(input
|
764
|
-
m = input.match(rule
|
807
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
808
|
+
# When a Label makes a match, it re-names the match to the value of its
|
809
|
+
# #label.
|
810
|
+
def match(input)
|
811
|
+
m = input.match(rule)
|
765
812
|
extend_match(m, label) if m
|
766
813
|
end
|
767
814
|
|
@@ -799,18 +846,15 @@ module Citrus
|
|
799
846
|
@range = Range.new(min, max)
|
800
847
|
end
|
801
848
|
|
802
|
-
# Returns the Match for this rule on +input+
|
803
|
-
|
804
|
-
def match(input, offset=0)
|
849
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
850
|
+
def match(input)
|
805
851
|
matches = []
|
806
|
-
os = offset
|
807
852
|
while matches.length < @range.end
|
808
|
-
m = input.match(rule
|
853
|
+
m = input.match(rule)
|
809
854
|
break unless m
|
810
855
|
matches << m
|
811
|
-
os += m.length
|
812
856
|
end
|
813
|
-
create_match(matches
|
857
|
+
create_match(matches) if @range.include?(matches.length)
|
814
858
|
end
|
815
859
|
|
816
860
|
# The minimum number of times this rule must match.
|
@@ -846,6 +890,7 @@ module Citrus
|
|
846
890
|
module List
|
847
891
|
include Nonterminal
|
848
892
|
|
893
|
+
# See Rule#paren?.
|
849
894
|
def paren?
|
850
895
|
rules.length > 1
|
851
896
|
end
|
@@ -859,11 +904,10 @@ module Citrus
|
|
859
904
|
class Choice
|
860
905
|
include List
|
861
906
|
|
862
|
-
# Returns the Match for this rule on +input+
|
863
|
-
|
864
|
-
def match(input, offset=0)
|
907
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
908
|
+
def match(input)
|
865
909
|
rules.each do |rule|
|
866
|
-
m = input.match(rule
|
910
|
+
m = input.match(rule)
|
867
911
|
return extend_match(m, name) if m
|
868
912
|
end
|
869
913
|
nil
|
@@ -883,18 +927,15 @@ module Citrus
|
|
883
927
|
class Sequence
|
884
928
|
include List
|
885
929
|
|
886
|
-
# Returns the Match for this rule on +input+
|
887
|
-
|
888
|
-
def match(input, offset=0)
|
930
|
+
# Returns the Match for this rule on +input+, +nil+ if no match can be made.
|
931
|
+
def match(input)
|
889
932
|
matches = []
|
890
|
-
os = offset
|
891
933
|
rules.each do |rule|
|
892
|
-
m = input.match(rule
|
934
|
+
m = input.match(rule)
|
893
935
|
break unless m
|
894
936
|
matches << m
|
895
|
-
os += m.length
|
896
937
|
end
|
897
|
-
create_match(matches
|
938
|
+
create_match(matches) if matches.length == rules.length
|
898
939
|
end
|
899
940
|
|
900
941
|
# Returns the Citrus notation of this rule as a string.
|
@@ -907,24 +948,19 @@ module Citrus
|
|
907
948
|
# match may contain any number of other matches. This class provides several
|
908
949
|
# convenient tree traversal methods that help when examining parse results.
|
909
950
|
class Match < String
|
910
|
-
def initialize(data
|
951
|
+
def initialize(data)
|
911
952
|
case data
|
912
953
|
when String
|
913
954
|
super(data)
|
914
|
-
when MatchData
|
915
|
-
super(data[0])
|
916
|
-
@captures = data.captures
|
917
955
|
when Array
|
918
956
|
super(data.join)
|
919
957
|
@matches = data
|
958
|
+
else
|
959
|
+
raise ArgumentError, "Cannot create match from object: %s" %
|
960
|
+
data.inspect
|
920
961
|
end
|
921
|
-
|
922
|
-
@offset = offset
|
923
962
|
end
|
924
963
|
|
925
|
-
# The offset in the input at which this match occurred.
|
926
|
-
attr_reader :offset
|
927
|
-
|
928
964
|
# An array of all names of this match. A name is added to a match object
|
929
965
|
# for each rule that returns that object when matching. These names can then
|
930
966
|
# be used to determine which rules were satisfied by a given match.
|
@@ -947,12 +983,6 @@ module Citrus
|
|
947
983
|
@matches ||= []
|
948
984
|
end
|
949
985
|
|
950
|
-
# An array of substrings returned by MatchData#captures if this match was
|
951
|
-
# created by an Expression.
|
952
|
-
def captures
|
953
|
-
@captures ||= []
|
954
|
-
end
|
955
|
-
|
956
986
|
# Returns an array of all sub-matches with the given +name+. If +deep+ is
|
957
987
|
# +false+, returns only sub-matches that are immediate descendants of this
|
958
988
|
# match.
|