regexador 0.4.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +385 -0
- data/lib/chars.rb +22 -0
- data/lib/keywords.rb +22 -0
- data/lib/predefs.rb +49 -0
- data/lib/regexador.rb +79 -0
- data/lib/regexador_parser.rb +113 -0
- data/lib/regexador_xform.rb +180 -0
- data/spec/parsing_spec.rb +174 -0
- data/spec/programs_spec.rb +2928 -0
- data/spec/testing.rb +35 -0
- data/test/test.rb +39 -0
- metadata +109 -0
data/lib/regexador.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
class Regexador
|
2
|
+
# Only a skeleton...
|
3
|
+
end
|
4
|
+
|
5
|
+
require_relative './regexador_parser'
|
6
|
+
require_relative './regexador_xform'
|
7
|
+
|
8
|
+
require 'parslet/convenience'
|
9
|
+
|
10
|
+
class Regexador
|
11
|
+
def initialize(str, debug=false)
|
12
|
+
@code = str
|
13
|
+
if debug
|
14
|
+
puts
|
15
|
+
puts "---- Code: ------"
|
16
|
+
puts str
|
17
|
+
puts "-----------------"
|
18
|
+
end
|
19
|
+
|
20
|
+
@parser = Parser.new
|
21
|
+
meth = debug ? :parse_with_debug : :parse
|
22
|
+
@tree = @parser.send(meth, str)
|
23
|
+
|
24
|
+
xform = Transform.new
|
25
|
+
if debug
|
26
|
+
puts "\n\nParser gives:"
|
27
|
+
pp @tree
|
28
|
+
end
|
29
|
+
|
30
|
+
@regex_tree = xform.apply(@tree)
|
31
|
+
@regex_str = @regex_tree.to_s
|
32
|
+
if debug
|
33
|
+
puts "\n\nTransform gives:"
|
34
|
+
pp @regex_tree
|
35
|
+
end
|
36
|
+
|
37
|
+
@regex = Regexp.compile(@regex_tree.to_s)
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_regex
|
41
|
+
@regex
|
42
|
+
end
|
43
|
+
|
44
|
+
def match(str, hash={})
|
45
|
+
hash.each_pair do |var, val|
|
46
|
+
@regex_str.gsub!(/\(#{var}\)\{0\}/, val)
|
47
|
+
end
|
48
|
+
@regex = Regexp.compile(@regex_str) unless hash.empty?
|
49
|
+
result = @regex.match(str)
|
50
|
+
return nil if result.nil?
|
51
|
+
|
52
|
+
# Logic below may change...
|
53
|
+
|
54
|
+
names = result.names
|
55
|
+
obj = Object.new
|
56
|
+
klass = obj.singleton_class
|
57
|
+
names.each {|name| klass.class_eval { define_method(name) { result[name] } } }
|
58
|
+
klass.class_eval { define_method(:[]) {|*args| args.map {|cvar| result[name] } } }
|
59
|
+
obj
|
60
|
+
end
|
61
|
+
|
62
|
+
def match?(str, hash={})
|
63
|
+
!!match(str, hash) # Return Boolean
|
64
|
+
end
|
65
|
+
|
66
|
+
def =~(other)
|
67
|
+
other = stringify(other)
|
68
|
+
raise ArgumentError unless String === other
|
69
|
+
match(other)
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def stringify(obj)
|
75
|
+
return obj if String === obj
|
76
|
+
return obj.to_str if obj.respond_to?(:to_str)
|
77
|
+
return obj
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
abort "Require out of order" if ! defined? Regexador
|
4
|
+
|
5
|
+
class Regexador::Parser < Parslet::Parser
|
6
|
+
end
|
7
|
+
|
8
|
+
require_relative './chars' # These three files
|
9
|
+
require_relative './predefs' # reopen the class
|
10
|
+
require_relative './keywords' # Regexador::Parser
|
11
|
+
|
12
|
+
class Regexador::Parser
|
13
|
+
rule(:space) { match[" \t"].repeat(1) }
|
14
|
+
rule(:space?) { space.maybe }
|
15
|
+
rule(:white) { (endofline | match("\s")).repeat(1) }
|
16
|
+
rule(:white?) { white.maybe }
|
17
|
+
|
18
|
+
rule(:lower) { match('[a-z]') }
|
19
|
+
rule(:upper) { match('[A-Z]') }
|
20
|
+
|
21
|
+
rule(:comment) { cHASH >> space >> (cNEWLINE.absent? >> any).repeat(0) }
|
22
|
+
rule(:endofline) { space? >> comment.maybe >> cNEWLINE }
|
23
|
+
|
24
|
+
rule(:digit) { match('[0-9]') }
|
25
|
+
rule(:digits) { digit.repeat(1) }
|
26
|
+
rule(:hexdigit) { digit | match("[abcdef]") }
|
27
|
+
rule(:quoted) { match('[^"]').repeat(0) }
|
28
|
+
rule(:single_quoted) { match("[^']").repeat(0) }
|
29
|
+
rule(:graph_char) { match ("[[:graph:]]") } # { match('[!-~]') }
|
30
|
+
rule(:name) { keyword.absent? >> lower >> (lower | cUNDERSCORE | digit).repeat(0) }
|
31
|
+
|
32
|
+
rule(:variable) { name.as(:var) }
|
33
|
+
rule(:capture_var) { (cAT >> name.as(:cvar)) }
|
34
|
+
rule(:parameter) { (cCOLON >> name.as(:param)) }
|
35
|
+
|
36
|
+
rule(:posix_class) { cPERCENT >> name.as(:pclass) }
|
37
|
+
|
38
|
+
rule(:string) { cQUOTE >> quoted.as(:string) >> cQUOTE }
|
39
|
+
|
40
|
+
rule(:simple_class) { cSQUOTE >> single_quoted.as(:char_class) >> cSQUOTE }
|
41
|
+
rule(:negated_class) { cTILDE >> cSQUOTE >> single_quoted.as(:neg_class) >> cSQUOTE }
|
42
|
+
rule(:char_class) { simple_class | negated_class }
|
43
|
+
|
44
|
+
rule(:number) { digits }
|
45
|
+
rule(:numeric) { number | variable | parameter }
|
46
|
+
|
47
|
+
rule(:codepoint) { cAMPERSAND >> (hexdigit >> hexdigit >> hexdigit >> hexdigit).as(:unicode) }
|
48
|
+
|
49
|
+
rule(:char) { (cTICK >> graph_char.as(:char)) | codepoint }
|
50
|
+
|
51
|
+
rule(:simple_range) { char.as(:c1) >> cHYPHEN >> char.as(:c2) }
|
52
|
+
rule(:negated_range) { char.as(:nr1) >> cTILDE >> char.as(:nr2) }
|
53
|
+
rule(:range) { negated_range | simple_range }
|
54
|
+
|
55
|
+
rule(:negated_char) { cTILDE >> char.as(:nchar) } # ~`x means /[^x]/
|
56
|
+
|
57
|
+
rule(:capture) { capture_var.as(:lhs) >> space? >> (cEQUAL >> space? >> pattern.as(:rhs)).maybe }
|
58
|
+
|
59
|
+
rule(:simple_pattern) { predef | range | negated_char | posix_class | string |
|
60
|
+
# X `a-`c ~`a %name "abc"
|
61
|
+
char_class | char | parameter | variable | capture }
|
62
|
+
# 'abc' `a :param xyz @xyz = ...
|
63
|
+
|
64
|
+
rule(:qualifier) { (kANY | kMANY | kMAYBE | kNOCASE | kWITHIN | kESCAPING).as(:qualifier) >>
|
65
|
+
fancy_pattern.as(:match_item) }
|
66
|
+
|
67
|
+
# FIXME above: within and escaping can't really take an arbitrary pattern
|
68
|
+
|
69
|
+
###
|
70
|
+
rule(:pos_lookahead) { kFIND >> space >> simple_pattern.as(:findpat_ahead) >> space >>
|
71
|
+
kWITH >> space >> simple_pattern.as(:pospat) }
|
72
|
+
rule(:neg_lookahead) { kFIND >> space >> simple_pattern.as(:findpat_ahead) >> space >>
|
73
|
+
kWITHOUT >> space >> simple_pattern.as(:negpat) }
|
74
|
+
rule(:pos_lookbehind) { kWITH >> space >> simple_pattern.as(:pospat) >> space >>
|
75
|
+
kFIND >> space >> simple_pattern.as(:findpat_behind) }
|
76
|
+
rule(:neg_lookbehind) { kWITHOUT >> space >> simple_pattern.as(:negpat) >> space >>
|
77
|
+
kFIND >> space >> simple_pattern.as(:findpat_behind) }
|
78
|
+
rule(:lookaround) { pos_lookahead | neg_lookahead | pos_lookbehind | neg_lookbehind }
|
79
|
+
###
|
80
|
+
|
81
|
+
rule(:repeat1) { numeric.as(:num1) }
|
82
|
+
rule(:repeat2) { repeat1 >> cCOMMA >> numeric.as(:num2) }
|
83
|
+
rule(:repetition) { (repeat2 | repeat1) >> space? >> cTIMES >> space? >> fancy_pattern.as(:match_item) }
|
84
|
+
|
85
|
+
rule(:parenthesized) { cLPAREN >> space? >> pattern >> space? >> cRPAREN }
|
86
|
+
|
87
|
+
rule(:fancy_pattern) { space? >> (repetition | simple_pattern | qualifier | lookaround | parenthesized) >> space? }
|
88
|
+
# num `~"' keyword find/with (
|
89
|
+
|
90
|
+
rule(:concat) { (fancy_pattern >> (space? >> fancy_pattern).repeat(0)).as(:sequence) }
|
91
|
+
|
92
|
+
rule(:pattern) { (concat >> space? >> (cBAR >> space? >> concat).repeat(0)).as(:alternation) }
|
93
|
+
|
94
|
+
rule(:rvalue) { pattern | numeric } # a string is-a pattern
|
95
|
+
|
96
|
+
rule(:assignment) { space? >> name.as(:var) >> space? >> cEQUAL >> space? >> rvalue.as(:rvalue) }
|
97
|
+
|
98
|
+
rule(:definitions) { (endofline | assignment >> endofline).repeat(0) }
|
99
|
+
|
100
|
+
rule(:oneline_clause) { space? >> kMATCH >> space? >> pattern >> kEND >> endofline.maybe }
|
101
|
+
|
102
|
+
rule(:single_line) { endofline | space? >> pattern >> endofline }
|
103
|
+
|
104
|
+
rule(:multiline_clause) { space? >> kMATCH >> endofline >> single_line.repeat(1).as(:lines) >> space? >>
|
105
|
+
kEND >> endofline.maybe }
|
106
|
+
|
107
|
+
rule(:match_clause) { multiline_clause | oneline_clause }
|
108
|
+
|
109
|
+
rule(:program) { definitions.as(:definitions) >> match_clause.as(:match) >> endofline.repeat(0) }
|
110
|
+
|
111
|
+
root(:program)
|
112
|
+
end
|
113
|
+
|
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
abort "Require out of order" if ! defined? Regexador
|
4
|
+
|
5
|
+
class Regexador::Transform < Parslet::Transform
|
6
|
+
class Node
|
7
|
+
def self.make(*fields, &block)
|
8
|
+
klass = ::Class.new(self) do
|
9
|
+
fields.each {|field| attr_accessor field }
|
10
|
+
define_method(:fields) { fields.dup }
|
11
|
+
define_method(:to_s, &block)
|
12
|
+
end
|
13
|
+
klass
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize *values
|
17
|
+
fields.zip(values) {|f,v| self.send("#{f}=", v) }
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
raise NotImplementedError,
|
22
|
+
"Please implement #to_s for #{short_name}."
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_str
|
26
|
+
to_s
|
27
|
+
end
|
28
|
+
|
29
|
+
def short_name
|
30
|
+
str = self.class.name
|
31
|
+
str[str.rindex('::')+2..-1]
|
32
|
+
end
|
33
|
+
|
34
|
+
def inspect
|
35
|
+
data = fields.map {|f| "#{f}=#{self.send(f).inspect}" }.join(', ')
|
36
|
+
short_name + "(" + data + ")"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Later: Remember escaping for chars (char, c1, c2, nchar, ...)
|
41
|
+
|
42
|
+
XChar = Node.make(:char) do
|
43
|
+
Regexp.escape(char)
|
44
|
+
end
|
45
|
+
|
46
|
+
CharRange = Node.make(:c1, :c2) { "[#@c1-#@c2]" }
|
47
|
+
NegatedRange = Node.make(:nr1, :nr2) { "[^#@nr1-#@nr2]" }
|
48
|
+
NegatedChar = Node.make(:nchar) { "[^#@nchar]" } # More like a range really
|
49
|
+
POSIXClass = Node.make(:pclass) { "[[:#@pclass:]]" }
|
50
|
+
CharClass = Node.make(:char_class) { "[#@char_class]" }
|
51
|
+
NegatedClass = Node.make(:neg_class) { "[^#@neg_class]" }
|
52
|
+
Predefined = Node.make(:pre) do
|
53
|
+
sym = "p#@pre".to_sym
|
54
|
+
str = Regexador::Parser::Predef2Regex[sym]
|
55
|
+
raise "#@pre is not handled yet" if str.nil?
|
56
|
+
str
|
57
|
+
end
|
58
|
+
|
59
|
+
StringNode = Node.make(:string) { Regexp.escape(string.to_s) }
|
60
|
+
Repeat1 = Node.make(:num1, :match_item) { "(#@match_item){#@num1}" }
|
61
|
+
Repeat2 = Node.make(:num1, :num2, :match_item) { "(#@match_item){#@num1,#@num2}" }
|
62
|
+
Any = Node.make(:match_item) { "(#@match_item)*" }
|
63
|
+
Many = Node.make(:match_item) { "(#@match_item)+" }
|
64
|
+
Maybe = Node.make(:match_item) { "(#@match_item)?" }
|
65
|
+
Nocase = Node.make(:match_item) { "((?i)#@match_item)" }
|
66
|
+
|
67
|
+
FindWith = Node.make(:findpat_ahead, :pospat) { "((?=#@findpat_ahead#@pospat)#@findpat_ahead)" }
|
68
|
+
FindWithout = Node.make(:findpat_ahead, :negpat) { "((?!#@findpat#@negpat)#@findpat)" }
|
69
|
+
WithFind = Node.make(:pospat, :findpat_behind) { "((?<=#@pospat)#@findpat)" }
|
70
|
+
WithoutFind = Node.make(:negpat, :findpat_behind) { "((?<!#@negpat)#@pospat)" }
|
71
|
+
|
72
|
+
Within = Node.make(:delim) { "(#@delim.*?#@delim)" } # /x[^y]*?y/
|
73
|
+
Escaping = Node.make(:delim) { "\\#@delim|[^#@delim]*?#@delim" }
|
74
|
+
# escaping `" # /"(\\"|[^"])*?"/
|
75
|
+
|
76
|
+
Sequence = Node.make(:elements) { elements.map(&:to_s).join }
|
77
|
+
Alternation = Node.make(:elements) { '(' + elements.map(&:to_s).join('|') + ')' }
|
78
|
+
|
79
|
+
Assignment = Node.make(:var, :rvalue) { "" } # Doesn't actually translate directly.
|
80
|
+
Usage = Node.make(:var) { Assignment.bindings[var.to_s].to_s }
|
81
|
+
|
82
|
+
Program = Node.make(:definitions, :match) do
|
83
|
+
# NOTE Since we're using to_s for conversion to regular expression,
|
84
|
+
# debugging cannot be done using string interpolation, otherwise we
|
85
|
+
# call things out of order just by debug-printing them!
|
86
|
+
#
|
87
|
+
# puts "In Program: #{match}" # Don't do this
|
88
|
+
# puts "In Program: #{match.inspect}" # But this is OK
|
89
|
+
definitions.each {|d| d.store }
|
90
|
+
match.to_s
|
91
|
+
end
|
92
|
+
|
93
|
+
class Assignment < Node # For clarity: Really already is-a Node
|
94
|
+
class << self
|
95
|
+
attr_accessor :bindings
|
96
|
+
end
|
97
|
+
|
98
|
+
def store
|
99
|
+
# puts "Storing #@var = #{@rvalue.inspect}"
|
100
|
+
hash = self.class.bindings ||= {}
|
101
|
+
|
102
|
+
hash[@var.to_s] = @rvalue # Late binding
|
103
|
+
# hash[@var.to_s] = @rvalue.to_s # Early binding
|
104
|
+
# Think about the difference... :)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
Captured = Node.make(:cname, :pattern) { "(?<#@cname>#@pattern)" }
|
109
|
+
Backref = Node.make(:name) { "\\k<#@name>" }
|
110
|
+
|
111
|
+
Parameter = Node.make(:param) { "(#{param}){0}" }
|
112
|
+
|
113
|
+
PosAhead = Node.make(:pla1, :pla2) { "(?=#@pla1#@pla2)#@pla1" }
|
114
|
+
NegAhead = Node.make(:nla1, :nla2) { "(?!#@nla1#@nla2)#@nla1" }
|
115
|
+
PosBehind = Node.make(:plb1, :plb2) { "(?<=#@plb1)#@plb2" }
|
116
|
+
NegBehind = Node.make(:nlb1, :nlb2) { "(?<!#@nlb1)#@nlb2" }
|
117
|
+
|
118
|
+
# Actual transformation rules
|
119
|
+
|
120
|
+
rule(:char => simple(:ch)) { XChar.new(ch) }
|
121
|
+
rule(:unicode => simple(:hex4)) { StringNode.new("" << Integer("0x#{hex4}")) }
|
122
|
+
|
123
|
+
rule(:string => simple(:string)) { StringNode.new(string) }
|
124
|
+
# When the string is empty, parslet returns an empty array for lack of content.
|
125
|
+
# Map that to the empty string node.
|
126
|
+
rule(:string => sequence(:string)) { StringNode.new('') }
|
127
|
+
|
128
|
+
rule(:c1 => simple(:c1), :c2 => simple(:c2)) { CharRange.new(c1, c2) }
|
129
|
+
|
130
|
+
rule(:nr1 => simple(:nr1), :nr2 => simple(:nr2)) { NegatedRange.new(nr1, nr2) }
|
131
|
+
rule(:nchar => simple(:nchar)) { NegatedChar.new(nchar) } # Don't forget escaping
|
132
|
+
|
133
|
+
rule(:pclass => simple(:pclass)) { POSIXClass.new(pclass) }
|
134
|
+
|
135
|
+
rule(:char_class => simple(:char_class)) { CharClass.new(char_class) }
|
136
|
+
rule(:neg_class => simple(:neg_class)) { NegatedClass.new(neg_class) }
|
137
|
+
|
138
|
+
rule(:predef => simple(:content)) { Predefined.new(content) }
|
139
|
+
|
140
|
+
rule(:num1 => simple(:num1), :match_item => simple(:match_item)) { Repeat1.new(num1, match_item) }
|
141
|
+
|
142
|
+
rule(:num1 => simple(:num1), :num2 => simple(:num2), :match_item => simple(:match_item)) { Repeat2.new(num1, num2, match_item) }
|
143
|
+
|
144
|
+
rule(:qualifier => 'any', :match_item => simple(:match_item)) { Any.new(match_item) }
|
145
|
+
rule(:qualifier => 'many', :match_item => simple(:match_item)) { Many.new(match_item) }
|
146
|
+
rule(:qualifier => 'maybe', :match_item => simple(:match_item)) { Maybe.new(match_item) }
|
147
|
+
rule(:qualifier => 'nocase', :match_item => simple(:match_item)) { Nocase.new(match_item) }
|
148
|
+
rule(:qualifier => 'within', :match_item => simple(:match_item)) { Within.new(match_item) }
|
149
|
+
rule(:qualifier => 'escaping', :match_item => simple(:match_item)) { Escaping.new(match_item) }
|
150
|
+
|
151
|
+
rule(:findpat_ahead => simple(:pla1), :pospat => simple(:pla2)) { PosAhead.new(pla1, pla2) }
|
152
|
+
rule(:findpat_ahead => simple(:nla1), :negpat => simple(:nla2)) { NegAhead.new(nla1, nla2) }
|
153
|
+
rule(:pospat => simple(:plb1), :findpat_behind => simple(:plb2)) { PosBehind.new(plb1, plb2) }
|
154
|
+
rule(:negpat => simple(:nlb1), :findpat_behind => simple(:nlb2)) { NegBehind.new(nlb1, nlb2) }
|
155
|
+
|
156
|
+
rule(:var => simple(:var), :rvalue => simple(:rvalue)) { Assignment.new(@var, @rvalue) }
|
157
|
+
|
158
|
+
rule(:param => simple(:param)) { Parameter.new(param) }
|
159
|
+
|
160
|
+
rule(:alternation => simple(:pattern)) { pattern }
|
161
|
+
rule(:alternation => sequence(:alternatives)) { Alternation.new(alternatives) }
|
162
|
+
|
163
|
+
rule(:sequence => simple(:element)) { element }
|
164
|
+
rule(:sequence => sequence(:elements)) { Sequence.new(elements) }
|
165
|
+
|
166
|
+
# A series of statements on different lines is also a sequence.
|
167
|
+
rule(:lines => sequence(:lines)) { Sequence.new(lines) }
|
168
|
+
|
169
|
+
rule(:var => simple(:name)) { Usage.new(name) }
|
170
|
+
|
171
|
+
rule(:definitions => sequence(:definitions), :match => simple(:match)) { Program.new(definitions, match) }
|
172
|
+
rule(:definitions => sequence(:definitions), :match => sequence(:match)) { Program.new(definitions, match) }
|
173
|
+
|
174
|
+
# An expression of the form '@variable'
|
175
|
+
rule(:lhs => {:cvar => simple(:backref)}) { Backref.new(backref) }
|
176
|
+
|
177
|
+
# An expression of the form '@variable = expr'
|
178
|
+
rule(:lhs => {:cvar => simple(:cname)}, :rhs => simple(:pattern)) { Captured.new(cname, pattern) }
|
179
|
+
end
|
180
|
+
|
@@ -0,0 +1,174 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
require './spec/testing'
|
3
|
+
|
4
|
+
class Object
|
5
|
+
def succeeds
|
6
|
+
self.should_not == nil
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe Regexador do
|
11
|
+
before(:all) do
|
12
|
+
@parser = Regexador::Parser.new
|
13
|
+
@pattern = @parser.pattern
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "A special character" do
|
17
|
+
it "can be matched correctly" do
|
18
|
+
@parser.cSQUOTE.parse_with_debug("'").succeeds
|
19
|
+
@parser.cHASH.parse('#').succeeds
|
20
|
+
@parser.cNEWLINE.parse("\n").succeeds
|
21
|
+
@parser.cEQUAL.parse('=').succeeds
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "An international character" do
|
26
|
+
it "can follow a backtick" do #
|
27
|
+
@parser.char.parse_with_debug("`æ").succeeds
|
28
|
+
@parser.char.parse("`ß").succeeds
|
29
|
+
@parser.char.parse("`ç").succeeds
|
30
|
+
@parser.char.parse("`ö").succeeds
|
31
|
+
@parser.char.parse("`ñ").succeeds
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "A Unicode codepoint expression" do
|
36
|
+
it "can be matched" do
|
37
|
+
@parser.codepoint.parse_with_debug("&1234").succeeds
|
38
|
+
@parser.codepoint.parse('&beef').succeeds
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "A predefined token" do
|
43
|
+
%w(BOS EOS START END).each do |token|
|
44
|
+
describe token do
|
45
|
+
it 'matches using pattern' do
|
46
|
+
@parser.pattern.parse_with_debug(token).succeeds
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "An assignment" do
|
53
|
+
it "can be parsed" do
|
54
|
+
@parser.assignment.parse("a = 5").succeeds
|
55
|
+
@parser.assignment.parse("a= 5").succeeds
|
56
|
+
@parser.assignment.parse("a =5").succeeds
|
57
|
+
@parser.assignment.parse("a=5").succeeds
|
58
|
+
@parser.assignment.parse("myvar = 'xyz'").succeeds
|
59
|
+
@parser.assignment.parse('var2 = "hello"').succeeds
|
60
|
+
@parser.assignment.parse('this_var = `x-`z').succeeds
|
61
|
+
@parser.assignment.parse_with_debug('pat = maybe many `x-`z').succeeds
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "A keyword used as a variable name" do
|
66
|
+
it "will not parse" do
|
67
|
+
@parser.assignment.should_not parse("end = 'hello'")
|
68
|
+
# @parser.assignment.parse("endx = 'hello'")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe "A definition section" do
|
73
|
+
it "can be parsed" do
|
74
|
+
defs1 = "a = 5\nstr = \"hello\"\n"
|
75
|
+
@parser.definitions.parse_with_debug(defs1).succeeds
|
76
|
+
defs2 = <<-EOF
|
77
|
+
a = 5
|
78
|
+
# comment...
|
79
|
+
pat = maybe many `a-`c
|
80
|
+
# empty line follows:
|
81
|
+
|
82
|
+
str = "hello"
|
83
|
+
# another comment...
|
84
|
+
EOF
|
85
|
+
@parser.definitions.parse_with_debug(defs2).succeeds
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "A capture variable" do
|
90
|
+
it "can be parsed" do
|
91
|
+
str1 = "@myvar"
|
92
|
+
@parser.capture_var.parse(str1).succeeds
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe "A captured pattern" do
|
97
|
+
let(:prog) { "@myvar = maybe 'abc'" }
|
98
|
+
|
99
|
+
it "can be parsed (#capture)" do
|
100
|
+
@parser.capture.parse(prog).succeeds
|
101
|
+
end
|
102
|
+
it "can be parsed (#program)" do
|
103
|
+
@parser.parse("match #{prog} end").succeeds
|
104
|
+
end
|
105
|
+
end
|
106
|
+
describe "A back reference" do
|
107
|
+
let(:prog) { '@myvar' }
|
108
|
+
|
109
|
+
it 'can be parsed (#capture)' do
|
110
|
+
@parser.capture.parse(prog).succeeds
|
111
|
+
end
|
112
|
+
it 'can be parsed' do
|
113
|
+
@parser.parse("match #{prog} end").succeeds
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
describe "A one-line match clause" do
|
119
|
+
it "can be parsed" do
|
120
|
+
mc1 = <<-EOF
|
121
|
+
match `a~`x end
|
122
|
+
EOF
|
123
|
+
@parser.match_clause.parse_with_debug(mc1).succeeds
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
describe "A multiline match clause" do
|
128
|
+
it "can be parsed" do
|
129
|
+
mc2 = <<-EOF
|
130
|
+
match
|
131
|
+
`< "tag" WB
|
132
|
+
any ~`>
|
133
|
+
# blah blah blah
|
134
|
+
"</" "tag" `>
|
135
|
+
end
|
136
|
+
EOF
|
137
|
+
@parser.multiline_clause.parse_with_debug(mc2).succeeds
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe "An entire one-line program" do
|
142
|
+
it "can be parsed" do
|
143
|
+
prog = "match `a-`f end"
|
144
|
+
@parser.parse_with_debug(prog).succeeds
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
describe "An entire program" do
|
150
|
+
it "can be parsed" do
|
151
|
+
prog1 = <<-EOF
|
152
|
+
dot = "."
|
153
|
+
num = "25" D5 | `2 D4 D | maybe D1 1,2*D
|
154
|
+
match WB num dot num dot num dot num WB end
|
155
|
+
EOF
|
156
|
+
@parser.program.parse_with_debug(prog1).succeeds
|
157
|
+
|
158
|
+
prog2 = <<-EOF
|
159
|
+
# Warning: This one likely has errors!
|
160
|
+
|
161
|
+
visa = `4 12*D maybe 3*D
|
162
|
+
mc = `5 D5 14*D
|
163
|
+
amex = `3 '47' 13*D
|
164
|
+
diners = `3 (`0 D5 | '68' D) 11*D
|
165
|
+
discover = `6 ("011" | `5 2*D) 12*D
|
166
|
+
jcb = ("2131"|"1800"|"35" 3*D) 11*D
|
167
|
+
|
168
|
+
match visa | mc | amex | diners | discover | jcb end
|
169
|
+
EOF
|
170
|
+
@parser.program.parse_with_debug(prog2).succeeds
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|