citrus 1.7.0 → 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +217 -154
- data/doc/{background.rdoc → background.markdown} +35 -32
- data/doc/example.markdown +145 -0
- data/doc/index.markdown +18 -0
- data/doc/{license.rdoc → license.markdown} +2 -1
- data/doc/links.markdown +13 -0
- data/doc/syntax.markdown +129 -0
- data/examples/calc.citrus +55 -49
- data/examples/calc.rb +55 -49
- data/examples/ip.rb +1 -1
- data/lib/citrus.rb +118 -89
- data/lib/citrus/debug.rb +1 -1
- data/lib/citrus/file.rb +75 -154
- data/test/alias_test.rb +2 -4
- data/test/and_predicate_test.rb +1 -1
- data/test/but_predicate_test.rb +36 -0
- data/test/choice_test.rb +5 -5
- data/test/expression_test.rb +1 -1
- data/test/file_test.rb +17 -15
- data/test/fixed_width_test.rb +2 -2
- data/test/grammar_test.rb +8 -8
- data/test/helper.rb +54 -6
- data/test/label_test.rb +3 -3
- data/test/match_test.rb +5 -5
- data/test/not_predicate_test.rb +1 -1
- data/test/repeat_test.rb +17 -17
- data/test/rule_test.rb +5 -9
- data/test/sequence_test.rb +3 -3
- data/test/super_test.rb +2 -2
- metadata +11 -9
- data/doc/example.rdoc +0 -115
- data/doc/index.rdoc +0 -15
- data/doc/links.rdoc +0 -18
- data/doc/syntax.rdoc +0 -96
data/test/rule_test.rb
CHANGED
@@ -9,19 +9,15 @@ class RuleTest < Test::Unit::TestCase
|
|
9
9
|
end
|
10
10
|
|
11
11
|
NumericProc = Proc.new {
|
12
|
-
def
|
13
|
-
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_f
|
17
|
-
text.to_f
|
12
|
+
def add_one
|
13
|
+
to_i + 1
|
18
14
|
end
|
19
15
|
}
|
20
16
|
|
21
17
|
NumericModule = Module.new(&NumericProc)
|
22
18
|
|
23
19
|
def test_create
|
24
|
-
rule = Rule.
|
20
|
+
rule = Rule.eval('"a"')
|
25
21
|
assert(rule)
|
26
22
|
match = rule.match(input('a'))
|
27
23
|
assert(match)
|
@@ -40,7 +36,7 @@ class RuleTest < Test::Unit::TestCase
|
|
40
36
|
rule.extension = NumericProc
|
41
37
|
match = rule.match(input('1'))
|
42
38
|
assert(match)
|
43
|
-
assert_equal(
|
39
|
+
assert_equal(2, match.add_one)
|
44
40
|
assert_instance_of(Float, match.to_f)
|
45
41
|
end
|
46
42
|
|
@@ -49,7 +45,7 @@ class RuleTest < Test::Unit::TestCase
|
|
49
45
|
rule.extension = NumericModule
|
50
46
|
match = rule.match(input('1'))
|
51
47
|
assert(match)
|
52
|
-
assert_equal(
|
48
|
+
assert_equal(2, match.add_one)
|
53
49
|
assert_instance_of(Float, match.to_f)
|
54
50
|
end
|
55
51
|
|
data/test/sequence_test.rb
CHANGED
@@ -18,7 +18,7 @@ class SequenceTest < Test::Unit::TestCase
|
|
18
18
|
|
19
19
|
match = rule.match(input('ab'))
|
20
20
|
assert(match)
|
21
|
-
assert_equal('ab', match
|
21
|
+
assert_equal('ab', match)
|
22
22
|
assert_equal(2, match.length)
|
23
23
|
end
|
24
24
|
|
@@ -26,7 +26,7 @@ class SequenceTest < Test::Unit::TestCase
|
|
26
26
|
rule = Sequence.new([/\d+/, '+', /\d+/])
|
27
27
|
match = rule.match(input('1+2'))
|
28
28
|
assert(match)
|
29
|
-
assert_equal('1+2', match
|
29
|
+
assert_equal('1+2', match)
|
30
30
|
assert_equal(3, match.length)
|
31
31
|
end
|
32
32
|
|
@@ -34,7 +34,7 @@ class SequenceTest < Test::Unit::TestCase
|
|
34
34
|
rule = Sequence.new([/[0-9]+/, Choice.new(%w<+ ->), /\d+/])
|
35
35
|
match = rule.match(input('1+2'))
|
36
36
|
assert(match)
|
37
|
-
assert_equal('1+2', match
|
37
|
+
assert_equal('1+2', match)
|
38
38
|
assert_equal(3, match.length)
|
39
39
|
end
|
40
40
|
|
data/test/super_test.rb
CHANGED
@@ -20,12 +20,12 @@ class SuperTest < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
match = grammar2.parse('b')
|
22
22
|
assert(match)
|
23
|
-
assert_equal('b', match
|
23
|
+
assert_equal('b', match)
|
24
24
|
assert_equal(1, match.length)
|
25
25
|
|
26
26
|
match = grammar2.parse('a')
|
27
27
|
assert(match)
|
28
|
-
assert_equal('a', match
|
28
|
+
assert_equal('a', match)
|
29
29
|
assert_equal(1, match.length)
|
30
30
|
end
|
31
31
|
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
7
|
+
- 8
|
8
8
|
- 0
|
9
|
-
version: 1.
|
9
|
+
version: 1.8.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Michael Jackson
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-08-17 00:00:00 -
|
17
|
+
date: 2010-08-17 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -55,12 +55,12 @@ files:
|
|
55
55
|
- benchmark/seqpar.citrus
|
56
56
|
- benchmark/seqpar.gnuplot
|
57
57
|
- benchmark/seqpar.rb
|
58
|
-
- doc/background.
|
59
|
-
- doc/example.
|
60
|
-
- doc/index.
|
61
|
-
- doc/license.
|
62
|
-
- doc/links.
|
63
|
-
- doc/syntax.
|
58
|
+
- doc/background.markdown
|
59
|
+
- doc/example.markdown
|
60
|
+
- doc/index.markdown
|
61
|
+
- doc/license.markdown
|
62
|
+
- doc/links.markdown
|
63
|
+
- doc/syntax.markdown
|
64
64
|
- examples/calc.citrus
|
65
65
|
- examples/calc.rb
|
66
66
|
- examples/ip.citrus
|
@@ -79,6 +79,7 @@ files:
|
|
79
79
|
- test/_files/super.citrus
|
80
80
|
- test/alias_test.rb
|
81
81
|
- test/and_predicate_test.rb
|
82
|
+
- test/but_predicate_test.rb
|
82
83
|
- test/calc_file_test.rb
|
83
84
|
- test/calc_test.rb
|
84
85
|
- test/choice_test.rb
|
@@ -138,6 +139,7 @@ summary: Parsing Expressions for Ruby
|
|
138
139
|
test_files:
|
139
140
|
- test/alias_test.rb
|
140
141
|
- test/and_predicate_test.rb
|
142
|
+
- test/but_predicate_test.rb
|
141
143
|
- test/calc_file_test.rb
|
142
144
|
- test/calc_test.rb
|
143
145
|
- test/choice_test.rb
|
data/doc/example.rdoc
DELETED
@@ -1,115 +0,0 @@
|
|
1
|
-
= Example
|
2
|
-
|
3
|
-
Below is an example of a simple grammar that is able to parse strings of
|
4
|
-
integers separated by any amount of white space and a <tt>+</tt> symbol.
|
5
|
-
|
6
|
-
grammar Addition
|
7
|
-
rule additive
|
8
|
-
number plus (additive | number)
|
9
|
-
end
|
10
|
-
|
11
|
-
rule number
|
12
|
-
[0-9]+ space
|
13
|
-
end
|
14
|
-
|
15
|
-
rule plus
|
16
|
-
'+' space
|
17
|
-
end
|
18
|
-
|
19
|
-
rule space
|
20
|
-
[ \t]*
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
Several things to note about the above example:
|
25
|
-
|
26
|
-
* Grammar and rule declarations end with the <tt>end</tt> keyword
|
27
|
-
* A Sequence of rules is created by separating expressions with a space
|
28
|
-
* Likewise, ordered choice is represented with a vertical bar
|
29
|
-
* Parentheses may be used to override the natural binding order
|
30
|
-
* Rules may refer to other rules in their own definitions simply by using the
|
31
|
-
other rule's name
|
32
|
-
* Any expression may be followed by a quantifier
|
33
|
-
|
34
|
-
== Interpretation
|
35
|
-
|
36
|
-
The grammar above is able to parse simple mathematical expressions such as "1+2"
|
37
|
-
and "1 + 2+3", but it does not have enough semantic information to be able to
|
38
|
-
actually interpret these expressions.
|
39
|
-
|
40
|
-
At this point, when the grammar parses a string it generates a tree of Match[link:api/classes/Citrus/Match.html]
|
41
|
-
objects. Each match is created by a rule. A match will know what text it
|
42
|
-
contains, its offset in the original input, and what submatches it contains.
|
43
|
-
|
44
|
-
Submatches are created whenever a rule contains another rule. For example, in
|
45
|
-
the grammar above the number rule matches a string of digits followed by white
|
46
|
-
space. Thus, a match generated by the number rule will contain two submatches.
|
47
|
-
|
48
|
-
We can use Ruby's block syntax to create a module that will be attached to these
|
49
|
-
matches when they are created and is used to lazily extend them when we want to
|
50
|
-
interpret them. The following example shows one way to do this.
|
51
|
-
|
52
|
-
grammar Addition
|
53
|
-
rule additive
|
54
|
-
(number plus term:(additive | number)) {
|
55
|
-
def value
|
56
|
-
number.value + term.value
|
57
|
-
end
|
58
|
-
}
|
59
|
-
end
|
60
|
-
|
61
|
-
rule number
|
62
|
-
([0-9]+ space) {
|
63
|
-
def value
|
64
|
-
text.strip.to_i
|
65
|
-
end
|
66
|
-
}
|
67
|
-
end
|
68
|
-
|
69
|
-
rule plus
|
70
|
-
'+' space
|
71
|
-
end
|
72
|
-
|
73
|
-
rule space
|
74
|
-
[ \t]*
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
In this version of the grammar we have added two semantic blocks, one each for
|
79
|
-
the +additive+ and +number+ rules. These blocks contain methods that will be present
|
80
|
-
on all match objects that result from matches of those particular rules. It's
|
81
|
-
easiest to explain what is going on here by starting with the lowest level
|
82
|
-
block, which is defined within the +number+ rule.
|
83
|
-
|
84
|
-
The semantic block associated with the +number+ rule defines one method, +value+.
|
85
|
-
Inside this method, we can see that the value of a number match is determined to
|
86
|
-
be its text value, stripped of white space and converted to an integer.
|
87
|
-
|
88
|
-
The +additive+ rule also extends its matches with a +value+ method. Notice the use
|
89
|
-
of the "term" label within the rule definition. This label allows the match that
|
90
|
-
is created by either the +additive+ or the +number+ rule to be retrieved using the
|
91
|
-
"term" label. The value of an additive is determined to be the values of its
|
92
|
-
number and term matches added together using Ruby's addition operator.
|
93
|
-
|
94
|
-
Since additive is the first rule defined in the grammar, any match that results
|
95
|
-
from parsing a string with this grammar will have a value method that can be
|
96
|
-
used to recursively calculate the collective value of the entire match tree.
|
97
|
-
|
98
|
-
To give it a try, save the code for the Addition grammar in a file called
|
99
|
-
addition.citrus. Next, assuming you have the Citrus gem installed, try the
|
100
|
-
following sequence of commands in a terminal.
|
101
|
-
|
102
|
-
$ irb
|
103
|
-
> require 'citrus'
|
104
|
-
=> true
|
105
|
-
> Citrus.load 'addition'
|
106
|
-
=> [Addition]
|
107
|
-
> m = Addition.parse '1 + 2 + 3'
|
108
|
-
=> #<Citrus::Match ...
|
109
|
-
> m.value
|
110
|
-
=> 6
|
111
|
-
|
112
|
-
Congratulations! You just ran your first piece of Citrus code.
|
113
|
-
|
114
|
-
Take a look at examples/calc.citrus[http://github.com/mjijackson/citrus/blob/master/examples/calc.citrus] for an example of a calculator that is able
|
115
|
-
to parse and evaluate more complex mathematical expressions.
|
data/doc/index.rdoc
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
Citrus is a compact and powerful parsing library for Ruby[http://ruby-lang.org/] that combines the
|
2
|
-
elegance and expressiveness of the language with the simplicity and power of
|
3
|
-
parsing expressions.
|
4
|
-
|
5
|
-
= Installation
|
6
|
-
|
7
|
-
Via RubyGems[http://rubygems.org/]:
|
8
|
-
|
9
|
-
$ sudo gem install citrus
|
10
|
-
|
11
|
-
From a local copy:
|
12
|
-
|
13
|
-
$ git clone git://github.com/mjijackson/citrus.git
|
14
|
-
$ cd citrus
|
15
|
-
$ rake package && sudo rake install
|
data/doc/links.rdoc
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
= Links
|
2
|
-
|
3
|
-
The primary resource for all things to do with parsing expressions can be found
|
4
|
-
at MIT.
|
5
|
-
|
6
|
-
http://pdos.csail.mit.edu/~baford/packrat
|
7
|
-
|
8
|
-
A useful summary of parsing expression grammars can be found on Wikipedia as
|
9
|
-
well.
|
10
|
-
|
11
|
-
http://en.wikipedia.org/wiki/Parsing_expression_grammar
|
12
|
-
|
13
|
-
Citrus draws inspiration from another Ruby library for writing parsing
|
14
|
-
expression grammars, Treetop. While Citrus' syntax is similar to that of
|
15
|
-
Treetop, it's not identical. The link is included here for those who may wish to
|
16
|
-
explore an alternative implementation.
|
17
|
-
|
18
|
-
http://treetop.rubyforge.org
|
data/doc/syntax.rdoc
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
= Syntax
|
2
|
-
|
3
|
-
The most straightforward way to compose a Citrus grammar is to use Citrus' own
|
4
|
-
custom grammar syntax. This syntax borrows heavily from Ruby, so it should
|
5
|
-
already be familiar to Ruby programmers.
|
6
|
-
|
7
|
-
== Terminals
|
8
|
-
|
9
|
-
Terminals may be represented by a string or a regular expression. Both follow
|
10
|
-
the same rules as Ruby string and regular expression literals.
|
11
|
-
|
12
|
-
'abc'
|
13
|
-
"abc\n"
|
14
|
-
/\xFF/
|
15
|
-
|
16
|
-
Character classes and the dot (match anything) symbol are supported as well for
|
17
|
-
compatibility with other parsing expression implementations.
|
18
|
-
|
19
|
-
[a-z0-9] # match any lowercase letter or digit
|
20
|
-
[\x00-\xFF] # match any octet
|
21
|
-
. # match anything, even new lines
|
22
|
-
|
23
|
-
See FixedWidth[link:api/classes/Citrus/FixedWidth.html] and
|
24
|
-
Expression[link:api/classes/Citrus/Expression.html] for more information.
|
25
|
-
|
26
|
-
== Repetition
|
27
|
-
|
28
|
-
Quantifiers may be used after any expression to specify a number of times it
|
29
|
-
must match. The universal form of a quantifier is N*M where N is the minimum and
|
30
|
-
M is the maximum number of times the expression may match.
|
31
|
-
|
32
|
-
'abc'1*2 # match "abc" a minimum of one, maximum
|
33
|
-
# of two times
|
34
|
-
'abc'1* # match "abc" at least once
|
35
|
-
'abc'*2 # match "abc" a maximum of twice
|
36
|
-
|
37
|
-
The + and ? operators are supported as well for the common cases of 1* and *1
|
38
|
-
respectively.
|
39
|
-
|
40
|
-
'abc'+ # match "abc" at least once
|
41
|
-
'abc'? # match "abc" a maximum of once
|
42
|
-
|
43
|
-
See Repeat[link:api/classes/Citrus/Repeat.html] for more information.
|
44
|
-
|
45
|
-
== Lookahead
|
46
|
-
|
47
|
-
Both positive and negative lookahead are supported in Citrus. Use the & and !
|
48
|
-
operators to indicate that an expression either should or should not match. In
|
49
|
-
neither case is any input consumed.
|
50
|
-
|
51
|
-
&'a' 'b' # match a "b" preceded by an "a"
|
52
|
-
!'a' 'b' # match a "b" that is not preceded by an "a"
|
53
|
-
!'a' . # match any character except for "a"
|
54
|
-
|
55
|
-
See AndPredicate[link:api/classes/Citrus/AndPredicate.html] and
|
56
|
-
NotPredicate[link:api/classes/Citrus/NotPredicate.html] for more information.
|
57
|
-
|
58
|
-
== Sequences
|
59
|
-
|
60
|
-
Sequences of expressions may be separated by a space to indicate that the rules
|
61
|
-
should match in that order.
|
62
|
-
|
63
|
-
'a' 'b' 'c' # match "a", then "b", then "c"
|
64
|
-
'a' [0-9] # match "a", then a numeric digit
|
65
|
-
|
66
|
-
See Sequence[link:api/classes/Citrus/Sequence.html] for more information.
|
67
|
-
|
68
|
-
== Choices
|
69
|
-
|
70
|
-
Ordered choice is indicated by a vertical bar that separates two expressions.
|
71
|
-
Note that any operator binds more tightly than the bar.
|
72
|
-
|
73
|
-
'a' | 'b' # match "a" or "b"
|
74
|
-
'a' 'b' | 'c' # match "a" then "b" (in sequence), or "c"
|
75
|
-
|
76
|
-
See Choice[link:api/classes/Citrus/Choice.html] for more information.
|
77
|
-
|
78
|
-
== Super
|
79
|
-
|
80
|
-
When including a grammar inside another, all rules in the child that have the
|
81
|
-
same name as a rule in the parent also have access to the super keyword to
|
82
|
-
invoke the parent rule.
|
83
|
-
|
84
|
-
See Super[link:api/classes/Citrus/Super.html] for more information.
|
85
|
-
|
86
|
-
== Labels
|
87
|
-
|
88
|
-
Match objects may be referred to by a different name than the rule that
|
89
|
-
originally generated them. Labels are created by placing the label and a colon
|
90
|
-
immediately preceding any expression.
|
91
|
-
|
92
|
-
chars:/[a-z]+/ # the characters matched by the regular
|
93
|
-
# expression may be referred to as "chars"
|
94
|
-
# in a block method
|
95
|
-
|
96
|
-
See Label[link:api/classes/Citrus/Label.html] for more information.
|