citrus 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +217 -154
- data/doc/{background.rdoc → background.markdown} +35 -32
- data/doc/example.markdown +145 -0
- data/doc/index.markdown +18 -0
- data/doc/{license.rdoc → license.markdown} +2 -1
- data/doc/links.markdown +13 -0
- data/doc/syntax.markdown +129 -0
- data/examples/calc.citrus +55 -49
- data/examples/calc.rb +55 -49
- data/examples/ip.rb +1 -1
- data/lib/citrus.rb +118 -89
- data/lib/citrus/debug.rb +1 -1
- data/lib/citrus/file.rb +75 -154
- data/test/alias_test.rb +2 -4
- data/test/and_predicate_test.rb +1 -1
- data/test/but_predicate_test.rb +36 -0
- data/test/choice_test.rb +5 -5
- data/test/expression_test.rb +1 -1
- data/test/file_test.rb +17 -15
- data/test/fixed_width_test.rb +2 -2
- data/test/grammar_test.rb +8 -8
- data/test/helper.rb +54 -6
- data/test/label_test.rb +3 -3
- data/test/match_test.rb +5 -5
- data/test/not_predicate_test.rb +1 -1
- data/test/repeat_test.rb +17 -17
- data/test/rule_test.rb +5 -9
- data/test/sequence_test.rb +3 -3
- data/test/super_test.rb +2 -2
- metadata +11 -9
- data/doc/example.rdoc +0 -115
- data/doc/index.rdoc +0 -15
- data/doc/links.rdoc +0 -18
- data/doc/syntax.rdoc +0 -96
data/test/rule_test.rb
CHANGED
@@ -9,19 +9,15 @@ class RuleTest < Test::Unit::TestCase
|
|
9
9
|
end
|
10
10
|
|
11
11
|
NumericProc = Proc.new {
|
12
|
-
def
|
13
|
-
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_f
|
17
|
-
text.to_f
|
12
|
+
def add_one
|
13
|
+
to_i + 1
|
18
14
|
end
|
19
15
|
}
|
20
16
|
|
21
17
|
NumericModule = Module.new(&NumericProc)
|
22
18
|
|
23
19
|
def test_create
|
24
|
-
rule = Rule.
|
20
|
+
rule = Rule.eval('"a"')
|
25
21
|
assert(rule)
|
26
22
|
match = rule.match(input('a'))
|
27
23
|
assert(match)
|
@@ -40,7 +36,7 @@ class RuleTest < Test::Unit::TestCase
|
|
40
36
|
rule.extension = NumericProc
|
41
37
|
match = rule.match(input('1'))
|
42
38
|
assert(match)
|
43
|
-
assert_equal(
|
39
|
+
assert_equal(2, match.add_one)
|
44
40
|
assert_instance_of(Float, match.to_f)
|
45
41
|
end
|
46
42
|
|
@@ -49,7 +45,7 @@ class RuleTest < Test::Unit::TestCase
|
|
49
45
|
rule.extension = NumericModule
|
50
46
|
match = rule.match(input('1'))
|
51
47
|
assert(match)
|
52
|
-
assert_equal(
|
48
|
+
assert_equal(2, match.add_one)
|
53
49
|
assert_instance_of(Float, match.to_f)
|
54
50
|
end
|
55
51
|
|
data/test/sequence_test.rb
CHANGED
@@ -18,7 +18,7 @@ class SequenceTest < Test::Unit::TestCase
|
|
18
18
|
|
19
19
|
match = rule.match(input('ab'))
|
20
20
|
assert(match)
|
21
|
-
assert_equal('ab', match
|
21
|
+
assert_equal('ab', match)
|
22
22
|
assert_equal(2, match.length)
|
23
23
|
end
|
24
24
|
|
@@ -26,7 +26,7 @@ class SequenceTest < Test::Unit::TestCase
|
|
26
26
|
rule = Sequence.new([/\d+/, '+', /\d+/])
|
27
27
|
match = rule.match(input('1+2'))
|
28
28
|
assert(match)
|
29
|
-
assert_equal('1+2', match
|
29
|
+
assert_equal('1+2', match)
|
30
30
|
assert_equal(3, match.length)
|
31
31
|
end
|
32
32
|
|
@@ -34,7 +34,7 @@ class SequenceTest < Test::Unit::TestCase
|
|
34
34
|
rule = Sequence.new([/[0-9]+/, Choice.new(%w<+ ->), /\d+/])
|
35
35
|
match = rule.match(input('1+2'))
|
36
36
|
assert(match)
|
37
|
-
assert_equal('1+2', match
|
37
|
+
assert_equal('1+2', match)
|
38
38
|
assert_equal(3, match.length)
|
39
39
|
end
|
40
40
|
|
data/test/super_test.rb
CHANGED
@@ -20,12 +20,12 @@ class SuperTest < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
match = grammar2.parse('b')
|
22
22
|
assert(match)
|
23
|
-
assert_equal('b', match
|
23
|
+
assert_equal('b', match)
|
24
24
|
assert_equal(1, match.length)
|
25
25
|
|
26
26
|
match = grammar2.parse('a')
|
27
27
|
assert(match)
|
28
|
-
assert_equal('a', match
|
28
|
+
assert_equal('a', match)
|
29
29
|
assert_equal(1, match.length)
|
30
30
|
end
|
31
31
|
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
7
|
+
- 8
|
8
8
|
- 0
|
9
|
-
version: 1.
|
9
|
+
version: 1.8.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Michael Jackson
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-08-17 00:00:00 -
|
17
|
+
date: 2010-08-17 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -55,12 +55,12 @@ files:
|
|
55
55
|
- benchmark/seqpar.citrus
|
56
56
|
- benchmark/seqpar.gnuplot
|
57
57
|
- benchmark/seqpar.rb
|
58
|
-
- doc/background.
|
59
|
-
- doc/example.
|
60
|
-
- doc/index.
|
61
|
-
- doc/license.
|
62
|
-
- doc/links.
|
63
|
-
- doc/syntax.
|
58
|
+
- doc/background.markdown
|
59
|
+
- doc/example.markdown
|
60
|
+
- doc/index.markdown
|
61
|
+
- doc/license.markdown
|
62
|
+
- doc/links.markdown
|
63
|
+
- doc/syntax.markdown
|
64
64
|
- examples/calc.citrus
|
65
65
|
- examples/calc.rb
|
66
66
|
- examples/ip.citrus
|
@@ -79,6 +79,7 @@ files:
|
|
79
79
|
- test/_files/super.citrus
|
80
80
|
- test/alias_test.rb
|
81
81
|
- test/and_predicate_test.rb
|
82
|
+
- test/but_predicate_test.rb
|
82
83
|
- test/calc_file_test.rb
|
83
84
|
- test/calc_test.rb
|
84
85
|
- test/choice_test.rb
|
@@ -138,6 +139,7 @@ summary: Parsing Expressions for Ruby
|
|
138
139
|
test_files:
|
139
140
|
- test/alias_test.rb
|
140
141
|
- test/and_predicate_test.rb
|
142
|
+
- test/but_predicate_test.rb
|
141
143
|
- test/calc_file_test.rb
|
142
144
|
- test/calc_test.rb
|
143
145
|
- test/choice_test.rb
|
data/doc/example.rdoc
DELETED
@@ -1,115 +0,0 @@
|
|
1
|
-
= Example
|
2
|
-
|
3
|
-
Below is an example of a simple grammar that is able to parse strings of
|
4
|
-
integers separated by any amount of white space and a <tt>+</tt> symbol.
|
5
|
-
|
6
|
-
grammar Addition
|
7
|
-
rule additive
|
8
|
-
number plus (additive | number)
|
9
|
-
end
|
10
|
-
|
11
|
-
rule number
|
12
|
-
[0-9]+ space
|
13
|
-
end
|
14
|
-
|
15
|
-
rule plus
|
16
|
-
'+' space
|
17
|
-
end
|
18
|
-
|
19
|
-
rule space
|
20
|
-
[ \t]*
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
Several things to note about the above example:
|
25
|
-
|
26
|
-
* Grammar and rule declarations end with the <tt>end</tt> keyword
|
27
|
-
* A Sequence of rules is created by separating expressions with a space
|
28
|
-
* Likewise, ordered choice is represented with a vertical bar
|
29
|
-
* Parentheses may be used to override the natural binding order
|
30
|
-
* Rules may refer to other rules in their own definitions simply by using the
|
31
|
-
other rule's name
|
32
|
-
* Any expression may be followed by a quantifier
|
33
|
-
|
34
|
-
== Interpretation
|
35
|
-
|
36
|
-
The grammar above is able to parse simple mathematical expressions such as "1+2"
|
37
|
-
and "1 + 2+3", but it does not have enough semantic information to be able to
|
38
|
-
actually interpret these expressions.
|
39
|
-
|
40
|
-
At this point, when the grammar parses a string it generates a tree of Match[link:api/classes/Citrus/Match.html]
|
41
|
-
objects. Each match is created by a rule. A match will know what text it
|
42
|
-
contains, its offset in the original input, and what submatches it contains.
|
43
|
-
|
44
|
-
Submatches are created whenever a rule contains another rule. For example, in
|
45
|
-
the grammar above the number rule matches a string of digits followed by white
|
46
|
-
space. Thus, a match generated by the number rule will contain two submatches.
|
47
|
-
|
48
|
-
We can use Ruby's block syntax to create a module that will be attached to these
|
49
|
-
matches when they are created and is used to lazily extend them when we want to
|
50
|
-
interpret them. The following example shows one way to do this.
|
51
|
-
|
52
|
-
grammar Addition
|
53
|
-
rule additive
|
54
|
-
(number plus term:(additive | number)) {
|
55
|
-
def value
|
56
|
-
number.value + term.value
|
57
|
-
end
|
58
|
-
}
|
59
|
-
end
|
60
|
-
|
61
|
-
rule number
|
62
|
-
([0-9]+ space) {
|
63
|
-
def value
|
64
|
-
text.strip.to_i
|
65
|
-
end
|
66
|
-
}
|
67
|
-
end
|
68
|
-
|
69
|
-
rule plus
|
70
|
-
'+' space
|
71
|
-
end
|
72
|
-
|
73
|
-
rule space
|
74
|
-
[ \t]*
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
In this version of the grammar we have added two semantic blocks, one each for
|
79
|
-
the +additive+ and +number+ rules. These blocks contain methods that will be present
|
80
|
-
on all match objects that result from matches of those particular rules. It's
|
81
|
-
easiest to explain what is going on here by starting with the lowest level
|
82
|
-
block, which is defined within the +number+ rule.
|
83
|
-
|
84
|
-
The semantic block associated with the +number+ rule defines one method, +value+.
|
85
|
-
Inside this method, we can see that the value of a number match is determined to
|
86
|
-
be its text value, stripped of white space and converted to an integer.
|
87
|
-
|
88
|
-
The +additive+ rule also extends its matches with a +value+ method. Notice the use
|
89
|
-
of the "term" label within the rule definition. This label allows the match that
|
90
|
-
is created by either the +additive+ or the +number+ rule to be retrieved using the
|
91
|
-
"term" label. The value of an additive is determined to be the values of its
|
92
|
-
number and term matches added together using Ruby's addition operator.
|
93
|
-
|
94
|
-
Since additive is the first rule defined in the grammar, any match that results
|
95
|
-
from parsing a string with this grammar will have a value method that can be
|
96
|
-
used to recursively calculate the collective value of the entire match tree.
|
97
|
-
|
98
|
-
To give it a try, save the code for the Addition grammar in a file called
|
99
|
-
addition.citrus. Next, assuming you have the Citrus gem installed, try the
|
100
|
-
following sequence of commands in a terminal.
|
101
|
-
|
102
|
-
$ irb
|
103
|
-
> require 'citrus'
|
104
|
-
=> true
|
105
|
-
> Citrus.load 'addition'
|
106
|
-
=> [Addition]
|
107
|
-
> m = Addition.parse '1 + 2 + 3'
|
108
|
-
=> #<Citrus::Match ...
|
109
|
-
> m.value
|
110
|
-
=> 6
|
111
|
-
|
112
|
-
Congratulations! You just ran your first piece of Citrus code.
|
113
|
-
|
114
|
-
Take a look at examples/calc.citrus[http://github.com/mjijackson/citrus/blob/master/examples/calc.citrus] for an example of a calculator that is able
|
115
|
-
to parse and evaluate more complex mathematical expressions.
|
data/doc/index.rdoc
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
Citrus is a compact and powerful parsing library for Ruby[http://ruby-lang.org/] that combines the
|
2
|
-
elegance and expressiveness of the language with the simplicity and power of
|
3
|
-
parsing expressions.
|
4
|
-
|
5
|
-
= Installation
|
6
|
-
|
7
|
-
Via RubyGems[http://rubygems.org/]:
|
8
|
-
|
9
|
-
$ sudo gem install citrus
|
10
|
-
|
11
|
-
From a local copy:
|
12
|
-
|
13
|
-
$ git clone git://github.com/mjijackson/citrus.git
|
14
|
-
$ cd citrus
|
15
|
-
$ rake package && sudo rake install
|
data/doc/links.rdoc
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
= Links
|
2
|
-
|
3
|
-
The primary resource for all things to do with parsing expressions can be found
|
4
|
-
at MIT.
|
5
|
-
|
6
|
-
http://pdos.csail.mit.edu/~baford/packrat
|
7
|
-
|
8
|
-
A useful summary of parsing expression grammars can be found on Wikipedia as
|
9
|
-
well.
|
10
|
-
|
11
|
-
http://en.wikipedia.org/wiki/Parsing_expression_grammar
|
12
|
-
|
13
|
-
Citrus draws inspiration from another Ruby library for writing parsing
|
14
|
-
expression grammars, Treetop. While Citrus' syntax is similar to that of
|
15
|
-
Treetop, it's not identical. The link is included here for those who may wish to
|
16
|
-
explore an alternative implementation.
|
17
|
-
|
18
|
-
http://treetop.rubyforge.org
|
data/doc/syntax.rdoc
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
= Syntax
|
2
|
-
|
3
|
-
The most straightforward way to compose a Citrus grammar is to use Citrus' own
|
4
|
-
custom grammar syntax. This syntax borrows heavily from Ruby, so it should
|
5
|
-
already be familiar to Ruby programmers.
|
6
|
-
|
7
|
-
== Terminals
|
8
|
-
|
9
|
-
Terminals may be represented by a string or a regular expression. Both follow
|
10
|
-
the same rules as Ruby string and regular expression literals.
|
11
|
-
|
12
|
-
'abc'
|
13
|
-
"abc\n"
|
14
|
-
/\xFF/
|
15
|
-
|
16
|
-
Character classes and the dot (match anything) symbol are supported as well for
|
17
|
-
compatibility with other parsing expression implementations.
|
18
|
-
|
19
|
-
[a-z0-9] # match any lowercase letter or digit
|
20
|
-
[\x00-\xFF] # match any octet
|
21
|
-
. # match anything, even new lines
|
22
|
-
|
23
|
-
See FixedWidth[link:api/classes/Citrus/FixedWidth.html] and
|
24
|
-
Expression[link:api/classes/Citrus/Expression.html] for more information.
|
25
|
-
|
26
|
-
== Repetition
|
27
|
-
|
28
|
-
Quantifiers may be used after any expression to specify a number of times it
|
29
|
-
must match. The universal form of a quantifier is N*M where N is the minimum and
|
30
|
-
M is the maximum number of times the expression may match.
|
31
|
-
|
32
|
-
'abc'1*2 # match "abc" a minimum of one, maximum
|
33
|
-
# of two times
|
34
|
-
'abc'1* # match "abc" at least once
|
35
|
-
'abc'*2 # match "abc" a maximum of twice
|
36
|
-
|
37
|
-
The + and ? operators are supported as well for the common cases of 1* and *1
|
38
|
-
respectively.
|
39
|
-
|
40
|
-
'abc'+ # match "abc" at least once
|
41
|
-
'abc'? # match "abc" a maximum of once
|
42
|
-
|
43
|
-
See Repeat[link:api/classes/Citrus/Repeat.html] for more information.
|
44
|
-
|
45
|
-
== Lookahead
|
46
|
-
|
47
|
-
Both positive and negative lookahead are supported in Citrus. Use the & and !
|
48
|
-
operators to indicate that an expression either should or should not match. In
|
49
|
-
neither case is any input consumed.
|
50
|
-
|
51
|
-
&'a' 'b' # match a "b" preceded by an "a"
|
52
|
-
!'a' 'b' # match a "b" that is not preceded by an "a"
|
53
|
-
!'a' . # match any character except for "a"
|
54
|
-
|
55
|
-
See AndPredicate[link:api/classes/Citrus/AndPredicate.html] and
|
56
|
-
NotPredicate[link:api/classes/Citrus/NotPredicate.html] for more information.
|
57
|
-
|
58
|
-
== Sequences
|
59
|
-
|
60
|
-
Sequences of expressions may be separated by a space to indicate that the rules
|
61
|
-
should match in that order.
|
62
|
-
|
63
|
-
'a' 'b' 'c' # match "a", then "b", then "c"
|
64
|
-
'a' [0-9] # match "a", then a numeric digit
|
65
|
-
|
66
|
-
See Sequence[link:api/classes/Citrus/Sequence.html] for more information.
|
67
|
-
|
68
|
-
== Choices
|
69
|
-
|
70
|
-
Ordered choice is indicated by a vertical bar that separates two expressions.
|
71
|
-
Note that any operator binds more tightly than the bar.
|
72
|
-
|
73
|
-
'a' | 'b' # match "a" or "b"
|
74
|
-
'a' 'b' | 'c' # match "a" then "b" (in sequence), or "c"
|
75
|
-
|
76
|
-
See Choice[link:api/classes/Citrus/Choice.html] for more information.
|
77
|
-
|
78
|
-
== Super
|
79
|
-
|
80
|
-
When including a grammar inside another, all rules in the child that have the
|
81
|
-
same name as a rule in the parent also have access to the super keyword to
|
82
|
-
invoke the parent rule.
|
83
|
-
|
84
|
-
See Super[link:api/classes/Citrus/Super.html] for more information.
|
85
|
-
|
86
|
-
== Labels
|
87
|
-
|
88
|
-
Match objects may be referred to by a different name than the rule that
|
89
|
-
originally generated them. Labels are created by placing the label and a colon
|
90
|
-
immediately preceding any expression.
|
91
|
-
|
92
|
-
chars:/[a-z]+/ # the characters matched by the regular
|
93
|
-
# expression may be referred to as "chars"
|
94
|
-
# in a block method
|
95
|
-
|
96
|
-
See Label[link:api/classes/Citrus/Label.html] for more information.
|