citrus 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +217 -154
- data/doc/{background.rdoc → background.markdown} +35 -32
- data/doc/example.markdown +145 -0
- data/doc/index.markdown +18 -0
- data/doc/{license.rdoc → license.markdown} +2 -1
- data/doc/links.markdown +13 -0
- data/doc/syntax.markdown +129 -0
- data/examples/calc.citrus +55 -49
- data/examples/calc.rb +55 -49
- data/examples/ip.rb +1 -1
- data/lib/citrus.rb +118 -89
- data/lib/citrus/debug.rb +1 -1
- data/lib/citrus/file.rb +75 -154
- data/test/alias_test.rb +2 -4
- data/test/and_predicate_test.rb +1 -1
- data/test/but_predicate_test.rb +36 -0
- data/test/choice_test.rb +5 -5
- data/test/expression_test.rb +1 -1
- data/test/file_test.rb +17 -15
- data/test/fixed_width_test.rb +2 -2
- data/test/grammar_test.rb +8 -8
- data/test/helper.rb +54 -6
- data/test/label_test.rb +3 -3
- data/test/match_test.rb +5 -5
- data/test/not_predicate_test.rb +1 -1
- data/test/repeat_test.rb +17 -17
- data/test/rule_test.rb +5 -9
- data/test/sequence_test.rb +3 -3
- data/test/super_test.rb +2 -2
- metadata +11 -9
- data/doc/example.rdoc +0 -115
- data/doc/index.rdoc +0 -15
- data/doc/links.rdoc +0 -18
- data/doc/syntax.rdoc +0 -96
data/examples/calc.rb
CHANGED
@@ -1,97 +1,103 @@
|
|
1
1
|
require 'citrus'
|
2
2
|
|
3
|
-
# A grammar for mathematical formulas that apply
|
4
|
-
#
|
5
|
-
# ignoring whitespace.
|
3
|
+
# A grammar for mathematical formulas that apply basic mathematical operations
|
4
|
+
# to all numbers, respecting operator precedence and grouping of expressions
|
5
|
+
# while ignoring whitespace.
|
6
6
|
#
|
7
7
|
# An identical grammar that is written using Citrus' own grammar syntax can be
|
8
8
|
# found in calc.citrus.
|
9
9
|
grammar :Calc do
|
10
|
+
|
11
|
+
## Hierarchy
|
12
|
+
|
10
13
|
rule :term do
|
11
14
|
any(:additive, :factor)
|
12
15
|
end
|
13
16
|
|
14
17
|
rule :additive do
|
15
|
-
all(:factor,
|
16
|
-
|
17
|
-
operator.apply(factor, term)
|
18
|
-
end
|
18
|
+
all(:factor, :additive_operator, :term) {
|
19
|
+
additive_operator.value(factor.value, term.value)
|
19
20
|
}
|
20
21
|
end
|
21
22
|
|
22
23
|
rule :factor do
|
23
|
-
any(:multiplicative, :
|
24
|
+
any(:multiplicative, :prefix)
|
24
25
|
end
|
25
26
|
|
26
27
|
rule :multiplicative do
|
27
|
-
all(:
|
28
|
-
|
29
|
-
operator.apply(primary, factor)
|
30
|
-
end
|
28
|
+
all(:prefix, :multiplicative_operator, :factor) {
|
29
|
+
multiplicative_operator.value(prefix.value, factor.value)
|
31
30
|
}
|
32
31
|
end
|
33
32
|
|
34
|
-
rule :
|
35
|
-
any(:
|
33
|
+
rule :prefix do
|
34
|
+
any(:prefixed, :exponent)
|
36
35
|
end
|
37
36
|
|
38
|
-
rule :
|
39
|
-
all(:
|
40
|
-
|
41
|
-
term.value
|
42
|
-
end
|
37
|
+
rule :prefixed do
|
38
|
+
all(:unary_operator, :prefix) {
|
39
|
+
unary_operator.value(prefix.value)
|
43
40
|
}
|
44
41
|
end
|
45
42
|
|
43
|
+
rule :exponent do
|
44
|
+
any(:exponential, :primary)
|
45
|
+
end
|
46
|
+
|
47
|
+
rule :exponential do
|
48
|
+
all(:primary, :exponential_operator, :prefix) {
|
49
|
+
exponential_operator.value(primary.value, prefix.value)
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
rule :primary do
|
54
|
+
any(:group, :number)
|
55
|
+
end
|
56
|
+
|
57
|
+
rule :group do
|
58
|
+
all(:lparen, :term, :rparen) { term.value }
|
59
|
+
end
|
60
|
+
|
61
|
+
## Syntax
|
62
|
+
|
46
63
|
rule :number do
|
47
64
|
any(:float, :integer)
|
48
65
|
end
|
49
66
|
|
50
67
|
rule :float do
|
51
|
-
all(
|
52
|
-
def value
|
53
|
-
text.strip.to_f
|
54
|
-
end
|
55
|
-
}
|
68
|
+
all(:digits, '.', :digits, :space) { strip.to_f }
|
56
69
|
end
|
57
70
|
|
58
71
|
rule :integer do
|
59
|
-
all(
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
72
|
+
all(:digits, :space) { strip.to_i }
|
73
|
+
end
|
74
|
+
|
75
|
+
rule :digits do
|
76
|
+
/[0-9]+(?:_[0-9]+)*/
|
64
77
|
end
|
65
78
|
|
66
|
-
rule :
|
67
|
-
all('+', :space) {
|
68
|
-
|
69
|
-
factor.value + term.value
|
70
|
-
end
|
79
|
+
rule :additive_operator do
|
80
|
+
all(any('+', '-'), :space) { |a, b|
|
81
|
+
a.send(strip, b)
|
71
82
|
}
|
72
83
|
end
|
73
84
|
|
74
|
-
rule :
|
75
|
-
all('
|
76
|
-
|
77
|
-
factor.value - term.value
|
78
|
-
end
|
85
|
+
rule :multiplicative_operator do
|
86
|
+
all(any('*', '/', '%'), :space) { |a, b|
|
87
|
+
a.send(strip, b)
|
79
88
|
}
|
80
89
|
end
|
81
90
|
|
82
|
-
rule :
|
83
|
-
all('
|
84
|
-
|
85
|
-
primary.value * factor.value
|
86
|
-
end
|
91
|
+
rule :exponential_operator do
|
92
|
+
all('**', :space) { |a, b|
|
93
|
+
a ** b
|
87
94
|
}
|
88
95
|
end
|
89
96
|
|
90
|
-
rule :
|
91
|
-
all('
|
92
|
-
|
93
|
-
|
94
|
-
end
|
97
|
+
rule :unary_operator do
|
98
|
+
all(any('~', '+', '-'), :space) { |n|
|
99
|
+
# Unary + and - require an @.
|
100
|
+
n.send(strip == '~' ? strip : '%s@' % strip)
|
95
101
|
}
|
96
102
|
end
|
97
103
|
|
data/examples/ip.rb
CHANGED
@@ -7,7 +7,7 @@ require 'citrus'
|
|
7
7
|
|
8
8
|
# Load and evaluate the grammars contained in ip.citrus into the global
|
9
9
|
# namespace.
|
10
|
-
Citrus.load(File.expand_path('../ip
|
10
|
+
Citrus.load(File.expand_path('../ip', __FILE__))
|
11
11
|
|
12
12
|
if $0 == __FILE__
|
13
13
|
require 'test/unit'
|
data/lib/citrus.rb
CHANGED
@@ -4,17 +4,20 @@
|
|
4
4
|
#
|
5
5
|
# http://mjijackson.com/citrus
|
6
6
|
module Citrus
|
7
|
-
VERSION = [1, 7, 0]
|
8
|
-
|
9
|
-
Infinity = 1.0 / 0
|
10
|
-
|
11
7
|
autoload :File, 'citrus/file'
|
12
8
|
|
9
|
+
VERSION = [1, 8, 0]
|
10
|
+
|
13
11
|
# Returns the current version of Citrus as a string.
|
14
12
|
def self.version
|
15
13
|
VERSION.join('.')
|
16
14
|
end
|
17
15
|
|
16
|
+
# A pattern to match any character, including \n.
|
17
|
+
DOT = /./m
|
18
|
+
|
19
|
+
Infinity = 1.0 / 0
|
20
|
+
|
18
21
|
F = ::File
|
19
22
|
|
20
23
|
# Loads the grammar from the given +file+ into the global scope using #eval.
|
@@ -26,7 +29,8 @@ module Citrus
|
|
26
29
|
end
|
27
30
|
|
28
31
|
# Evaluates the given Citrus parsing expression grammar +code+ in the global
|
29
|
-
# scope.
|
32
|
+
# scope. The +code+ may contain the definition of any number of modules.
|
33
|
+
# Returns an array of any grammar modules that are created.
|
30
34
|
def self.eval(code)
|
31
35
|
File.parse(code).value
|
32
36
|
end
|
@@ -177,6 +181,12 @@ module Citrus
|
|
177
181
|
@root || rule_names.first
|
178
182
|
end
|
179
183
|
|
184
|
+
# Creates a new rule that will match any single character. A block may be
|
185
|
+
# provided to specify semantic behavior (via #ext).
|
186
|
+
def dot(&block)
|
187
|
+
ext(Rule.new(DOT), block)
|
188
|
+
end
|
189
|
+
|
180
190
|
# Creates a new Super for the rule currently being defined in the grammar. A
|
181
191
|
# block may be provided to specify semantic behavior (via #ext).
|
182
192
|
def sup(&block)
|
@@ -195,17 +205,25 @@ module Citrus
|
|
195
205
|
ext(NotPredicate.new(rule), block)
|
196
206
|
end
|
197
207
|
|
208
|
+
# Creates a new ButPredicate using the given +rule+. A block may be provided
|
209
|
+
# to specify semantic behavior (via #ext).
|
210
|
+
def but(rule, &block)
|
211
|
+
ext(ButPredicate.new(rule), block)
|
212
|
+
end
|
213
|
+
|
214
|
+
alias butp but # For consistency with #andp and #notp.
|
215
|
+
|
198
216
|
# Creates a new Label using the given +rule+ and +label+. A block may be
|
199
217
|
# provided to specify semantic behavior (via #ext).
|
200
218
|
def label(rule, label, &block)
|
201
|
-
ext(Label.new(
|
219
|
+
ext(Label.new(rule, label), block)
|
202
220
|
end
|
203
221
|
|
204
222
|
# Creates a new Repeat using the given +rule+. +min+ and +max+ specify the
|
205
223
|
# minimum and maximum number of times the rule must match. A block may be
|
206
224
|
# provided to specify semantic behavior (via #ext).
|
207
225
|
def rep(rule, min=1, max=Infinity, &block)
|
208
|
-
ext(Repeat.new(min, max
|
226
|
+
ext(Repeat.new(rule, min, max), block)
|
209
227
|
end
|
210
228
|
|
211
229
|
# An alias for #rep.
|
@@ -256,7 +274,13 @@ module Citrus
|
|
256
274
|
raise 'No rule named "%s"' % root unless root_rule
|
257
275
|
|
258
276
|
input = Input.new(string, opts[:memoize])
|
259
|
-
input.match(root_rule, opts[:offset])
|
277
|
+
match = input.match(root_rule, opts[:offset])
|
278
|
+
|
279
|
+
if match.nil? || (opts[:consume] && input.length != match.length)
|
280
|
+
raise ParseError.new(input)
|
281
|
+
end
|
282
|
+
|
283
|
+
match
|
260
284
|
end
|
261
285
|
|
262
286
|
# The default set of options that is used in #parse. The options hash may
|
@@ -270,10 +294,14 @@ module Citrus
|
|
270
294
|
# guarantees parsers will operate in linear time but costs
|
271
295
|
# significantly more in terms of time and memory required.
|
272
296
|
# Defaults to +false+.
|
297
|
+
# consume:: If this is +true+ a ParseError will be raised during a parse
|
298
|
+
# unless the entire input string is consumed. Defaults to
|
299
|
+
# +false+.
|
273
300
|
def default_parse_options
|
274
301
|
{ :offset => 0,
|
275
302
|
:root => root,
|
276
|
-
:memoize => false
|
303
|
+
:memoize => false,
|
304
|
+
:consume => false
|
277
305
|
}
|
278
306
|
end
|
279
307
|
end
|
@@ -341,6 +369,14 @@ module Citrus
|
|
341
369
|
# A Rule is an object that is used by a grammar to create matches on the
|
342
370
|
# Input during parsing.
|
343
371
|
module Rule
|
372
|
+
# Evaluates the given expression and creates a new rule object from it.
|
373
|
+
#
|
374
|
+
# Citrus::Rule.eval('"a" | "b"')
|
375
|
+
#
|
376
|
+
def self.eval(expr)
|
377
|
+
File.parse(expr, :root => :rule_body).value
|
378
|
+
end
|
379
|
+
|
344
380
|
# Returns a new Rule object depending on the type of object given.
|
345
381
|
def self.new(obj)
|
346
382
|
case obj
|
@@ -356,14 +392,6 @@ module Citrus
|
|
356
392
|
end
|
357
393
|
end
|
358
394
|
|
359
|
-
# Creates a new rule object from the given expression.
|
360
|
-
#
|
361
|
-
# Citrus::Rule.create('"a" | "b"')
|
362
|
-
#
|
363
|
-
def self.create(expr)
|
364
|
-
File.parse(expr, :root => :rule_body).value
|
365
|
-
end
|
366
|
-
|
367
395
|
@unique_id = 0
|
368
396
|
|
369
397
|
# Generates a new rule id.
|
@@ -391,7 +419,18 @@ module Citrus
|
|
391
419
|
# result from this rule. If +mod+ is a Proc, it is used to create an
|
392
420
|
# anonymous module.
|
393
421
|
def extension=(mod)
|
394
|
-
|
422
|
+
if Proc === mod
|
423
|
+
begin
|
424
|
+
tmp = Module.new(&mod)
|
425
|
+
raise ArgumentError unless tmp.instance_methods.any?
|
426
|
+
mod = tmp
|
427
|
+
rescue ArgumentError, NameError, NoMethodError
|
428
|
+
mod = Module.new { define_method(:value, &mod) }
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
raise ArgumentError unless Module === mod
|
433
|
+
|
395
434
|
@extension = mod
|
396
435
|
end
|
397
436
|
|
@@ -422,7 +461,7 @@ module Citrus
|
|
422
461
|
private
|
423
462
|
|
424
463
|
def extend_match(match, name)
|
425
|
-
match.
|
464
|
+
match.extend(extension) if extension
|
426
465
|
match.names << name if name
|
427
466
|
match
|
428
467
|
end
|
@@ -554,7 +593,7 @@ module Citrus
|
|
554
593
|
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
|
555
594
|
# no match can be made.
|
556
595
|
def match(input, offset=0)
|
557
|
-
create_match(rule.dup, offset) if
|
596
|
+
create_match(rule.dup, offset) if input[offset, rule.length] == rule
|
558
597
|
end
|
559
598
|
end
|
560
599
|
|
@@ -612,7 +651,7 @@ module Citrus
|
|
612
651
|
include Nonterminal
|
613
652
|
|
614
653
|
def initialize(rule='')
|
615
|
-
super([
|
654
|
+
super([rule])
|
616
655
|
end
|
617
656
|
|
618
657
|
# Returns the Rule object this rule uses to match.
|
@@ -663,6 +702,38 @@ module Citrus
|
|
663
702
|
end
|
664
703
|
end
|
665
704
|
|
705
|
+
# A ButPredicate is a Predicate that consumes all characters until its rule
|
706
|
+
# matches. It must match at least one character in order to succeed. The
|
707
|
+
# Citrus notation is any expression preceded by a tilde, e.g.:
|
708
|
+
#
|
709
|
+
# ~expr
|
710
|
+
#
|
711
|
+
class ButPredicate
|
712
|
+
include Predicate
|
713
|
+
|
714
|
+
DOT_RULE = Rule.new(DOT)
|
715
|
+
|
716
|
+
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
|
717
|
+
# no match can be made.
|
718
|
+
def match(input, offset=0)
|
719
|
+
matches = []
|
720
|
+
os = offset
|
721
|
+
while input.match(rule, os).nil?
|
722
|
+
m = input.match(DOT_RULE, os)
|
723
|
+
break unless m
|
724
|
+
matches << m
|
725
|
+
os += m.length
|
726
|
+
end
|
727
|
+
# Create a single match from the aggregate text value of all submatches.
|
728
|
+
create_match(matches.join, offset) if matches.any?
|
729
|
+
end
|
730
|
+
|
731
|
+
# Returns the Citrus notation of this rule as a string.
|
732
|
+
def to_s
|
733
|
+
'~' + rule.embed
|
734
|
+
end
|
735
|
+
end
|
736
|
+
|
666
737
|
# A Label is a Predicate that applies a new name to any matches made by its
|
667
738
|
# rule. The Citrus notation is any sequence of word characters (i.e.
|
668
739
|
# <tt>[a-zA-Z0-9_]</tt>) followed by a colon, followed by any other
|
@@ -673,30 +744,30 @@ module Citrus
|
|
673
744
|
class Label
|
674
745
|
include Predicate
|
675
746
|
|
676
|
-
def initialize(
|
747
|
+
def initialize(rule='', label='<label>')
|
677
748
|
super(rule)
|
678
|
-
self.
|
749
|
+
self.label = label
|
679
750
|
end
|
680
751
|
|
681
752
|
# Sets the name of this label.
|
682
|
-
def
|
683
|
-
@
|
753
|
+
def label=(label)
|
754
|
+
@label = label.to_sym
|
684
755
|
end
|
685
756
|
|
686
|
-
# The
|
687
|
-
attr_reader :
|
757
|
+
# The label this rule adds to all its matches.
|
758
|
+
attr_reader :label
|
688
759
|
|
689
760
|
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
|
690
761
|
# no match can be made. When a Label makes a match, it re-names the match to
|
691
762
|
# the value of its label.
|
692
763
|
def match(input, offset=0)
|
693
|
-
m =
|
694
|
-
extend_match(m,
|
764
|
+
m = input.match(rule, offset)
|
765
|
+
extend_match(m, label) if m
|
695
766
|
end
|
696
767
|
|
697
768
|
# Returns the Citrus notation of this rule as a string.
|
698
769
|
def to_s
|
699
|
-
|
770
|
+
label.to_s + ':' + rule.embed
|
700
771
|
end
|
701
772
|
end
|
702
773
|
|
@@ -722,10 +793,10 @@ module Citrus
|
|
722
793
|
class Repeat
|
723
794
|
include Predicate
|
724
795
|
|
725
|
-
def initialize(min=1, max=Infinity
|
796
|
+
def initialize(rule='', min=1, max=Infinity)
|
797
|
+
super(rule)
|
726
798
|
raise ArgumentError, "Min cannot be greater than max" if min > max
|
727
799
|
@range = Range.new(min, max)
|
728
|
-
super(rule)
|
729
800
|
end
|
730
801
|
|
731
802
|
# Returns the Match for this rule on +input+ at the given +offset+, +nil+ if
|
@@ -835,15 +906,16 @@ module Citrus
|
|
835
906
|
# The base class for all matches. Matches are organized into a tree where any
|
836
907
|
# match may contain any number of other matches. This class provides several
|
837
908
|
# convenient tree traversal methods that help when examining parse results.
|
838
|
-
class Match
|
909
|
+
class Match < String
|
839
910
|
def initialize(data, offset=0)
|
840
911
|
case data
|
841
912
|
when String
|
842
|
-
|
913
|
+
super(data)
|
843
914
|
when MatchData
|
844
|
-
|
915
|
+
super(data[0])
|
845
916
|
@captures = data.captures
|
846
917
|
when Array
|
918
|
+
super(data.join)
|
847
919
|
@matches = data
|
848
920
|
end
|
849
921
|
|
@@ -870,11 +942,6 @@ module Citrus
|
|
870
942
|
names.include?(name)
|
871
943
|
end
|
872
944
|
|
873
|
-
# An array of all extension modules of this match.
|
874
|
-
def extensions
|
875
|
-
@extensions ||= []
|
876
|
-
end
|
877
|
-
|
878
945
|
# An array of all sub-matches of this match.
|
879
946
|
def matches
|
880
947
|
@matches ||= []
|
@@ -886,31 +953,13 @@ module Citrus
|
|
886
953
|
@captures ||= []
|
887
954
|
end
|
888
955
|
|
889
|
-
# Returns the raw text value of this match, which may simply be an
|
890
|
-
# aggregate of the text of all sub-matches if this match is not #terminal?.
|
891
|
-
def text
|
892
|
-
@text ||= matches.inject('') {|s, m| s << m.text }
|
893
|
-
end
|
894
|
-
|
895
|
-
alias to_s text
|
896
|
-
|
897
|
-
# Returns the length of this match's #text value as an Integer.
|
898
|
-
def length
|
899
|
-
text.length
|
900
|
-
end
|
901
|
-
|
902
|
-
# Passes all arguments to the #text of this match.
|
903
|
-
def [](*args)
|
904
|
-
text.__send__(:[], *args)
|
905
|
-
end
|
906
|
-
|
907
956
|
# Returns an array of all sub-matches with the given +name+. If +deep+ is
|
908
957
|
# +false+, returns only sub-matches that are immediate descendants of this
|
909
958
|
# match.
|
910
959
|
def find(name, deep=true)
|
911
960
|
sym = name.to_sym
|
912
961
|
ms = matches.select {|m| m.has_name?(sym) }
|
913
|
-
|
962
|
+
matches.each {|m| ms.concat(m.find(name, deep)) } if deep
|
914
963
|
ms
|
915
964
|
end
|
916
965
|
|
@@ -927,41 +976,21 @@ module Citrus
|
|
927
976
|
matches.length == 0
|
928
977
|
end
|
929
978
|
|
930
|
-
#
|
931
|
-
def
|
932
|
-
|
933
|
-
end
|
934
|
-
|
935
|
-
alias eql? ==
|
936
|
-
|
937
|
-
private
|
938
|
-
|
939
|
-
def redefine_method_missing! # :nodoc:
|
940
|
-
instance_eval(<<-RUBY, __FILE__, __LINE__ + 1)
|
941
|
-
def method_missing(sym, *args)
|
942
|
-
if sym == :to_ary
|
943
|
-
original_method_missing(sym, *args)
|
944
|
-
else
|
945
|
-
m = first(sym)
|
946
|
-
return m if m
|
947
|
-
raise 'No match named "%s" in %s (%s)' % [sym, self, name]
|
948
|
-
end
|
949
|
-
end
|
950
|
-
RUBY
|
979
|
+
# Creates a new String object from the contents of this match.
|
980
|
+
def to_s
|
981
|
+
String.new(self)
|
951
982
|
end
|
952
983
|
|
953
|
-
alias original_method_missing method_missing
|
954
|
-
|
955
|
-
public
|
956
|
-
|
957
984
|
# Allows sub-matches of this match to be retrieved by name as instance
|
958
985
|
# methods.
|
959
986
|
def method_missing(sym, *args)
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
987
|
+
m = first(sym)
|
988
|
+
return m if m
|
989
|
+
raise 'No match named "%s" in %s (%s)' % [sym, self, name]
|
990
|
+
end
|
991
|
+
|
992
|
+
def to_ary
|
993
|
+
# This method intentionally left blank to work around a bug in Ruby 1.9.
|
965
994
|
end
|
966
995
|
end
|
967
996
|
end
|