regexador 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +385 -0
- data/lib/chars.rb +22 -0
- data/lib/keywords.rb +22 -0
- data/lib/predefs.rb +49 -0
- data/lib/regexador.rb +79 -0
- data/lib/regexador_parser.rb +113 -0
- data/lib/regexador_xform.rb +180 -0
- data/spec/parsing_spec.rb +174 -0
- data/spec/programs_spec.rb +2928 -0
- data/spec/testing.rb +35 -0
- data/test/test.rb +39 -0
- metadata +109 -0
data/lib/regexador.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
class Regexador
|
2
|
+
# Only a skeleton...
|
3
|
+
end
|
4
|
+
|
5
|
+
require_relative './regexador_parser'
|
6
|
+
require_relative './regexador_xform'
|
7
|
+
|
8
|
+
require 'parslet/convenience'
|
9
|
+
|
10
|
+
class Regexador
|
11
|
+
def initialize(str, debug=false)
|
12
|
+
@code = str
|
13
|
+
if debug
|
14
|
+
puts
|
15
|
+
puts "---- Code: ------"
|
16
|
+
puts str
|
17
|
+
puts "-----------------"
|
18
|
+
end
|
19
|
+
|
20
|
+
@parser = Parser.new
|
21
|
+
meth = debug ? :parse_with_debug : :parse
|
22
|
+
@tree = @parser.send(meth, str)
|
23
|
+
|
24
|
+
xform = Transform.new
|
25
|
+
if debug
|
26
|
+
puts "\n\nParser gives:"
|
27
|
+
pp @tree
|
28
|
+
end
|
29
|
+
|
30
|
+
@regex_tree = xform.apply(@tree)
|
31
|
+
@regex_str = @regex_tree.to_s
|
32
|
+
if debug
|
33
|
+
puts "\n\nTransform gives:"
|
34
|
+
pp @regex_tree
|
35
|
+
end
|
36
|
+
|
37
|
+
@regex = Regexp.compile(@regex_tree.to_s)
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_regex
|
41
|
+
@regex
|
42
|
+
end
|
43
|
+
|
44
|
+
def match(str, hash={})
|
45
|
+
hash.each_pair do |var, val|
|
46
|
+
@regex_str.gsub!(/\(#{var}\)\{0\}/, val)
|
47
|
+
end
|
48
|
+
@regex = Regexp.compile(@regex_str) unless hash.empty?
|
49
|
+
result = @regex.match(str)
|
50
|
+
return nil if result.nil?
|
51
|
+
|
52
|
+
# Logic below may change...
|
53
|
+
|
54
|
+
names = result.names
|
55
|
+
obj = Object.new
|
56
|
+
klass = obj.singleton_class
|
57
|
+
names.each {|name| klass.class_eval { define_method(name) { result[name] } } }
|
58
|
+
klass.class_eval { define_method(:[]) {|*args| args.map {|cvar| result[name] } } }
|
59
|
+
obj
|
60
|
+
end
|
61
|
+
|
62
|
+
def match?(str, hash={})
|
63
|
+
!!match(str, hash) # Return Boolean
|
64
|
+
end
|
65
|
+
|
66
|
+
def =~(other)
|
67
|
+
other = stringify(other)
|
68
|
+
raise ArgumentError unless String === other
|
69
|
+
match(other)
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def stringify(obj)
|
75
|
+
return obj if String === obj
|
76
|
+
return obj.to_str if obj.respond_to?(:to_str)
|
77
|
+
return obj
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
abort "Require out of order" if ! defined? Regexador
|
4
|
+
|
5
|
+
class Regexador::Parser < Parslet::Parser
|
6
|
+
end
|
7
|
+
|
8
|
+
require_relative './chars' # These three files
|
9
|
+
require_relative './predefs' # reopen the class
|
10
|
+
require_relative './keywords' # Regexador::Parser
|
11
|
+
|
12
|
+
class Regexador::Parser
|
13
|
+
rule(:space) { match[" \t"].repeat(1) }
|
14
|
+
rule(:space?) { space.maybe }
|
15
|
+
rule(:white) { (endofline | match("\s")).repeat(1) }
|
16
|
+
rule(:white?) { white.maybe }
|
17
|
+
|
18
|
+
rule(:lower) { match('[a-z]') }
|
19
|
+
rule(:upper) { match('[A-Z]') }
|
20
|
+
|
21
|
+
rule(:comment) { cHASH >> space >> (cNEWLINE.absent? >> any).repeat(0) }
|
22
|
+
rule(:endofline) { space? >> comment.maybe >> cNEWLINE }
|
23
|
+
|
24
|
+
rule(:digit) { match('[0-9]') }
|
25
|
+
rule(:digits) { digit.repeat(1) }
|
26
|
+
rule(:hexdigit) { digit | match("[abcdef]") }
|
27
|
+
rule(:quoted) { match('[^"]').repeat(0) }
|
28
|
+
rule(:single_quoted) { match("[^']").repeat(0) }
|
29
|
+
rule(:graph_char) { match ("[[:graph:]]") } # { match('[!-~]') }
|
30
|
+
rule(:name) { keyword.absent? >> lower >> (lower | cUNDERSCORE | digit).repeat(0) }
|
31
|
+
|
32
|
+
rule(:variable) { name.as(:var) }
|
33
|
+
rule(:capture_var) { (cAT >> name.as(:cvar)) }
|
34
|
+
rule(:parameter) { (cCOLON >> name.as(:param)) }
|
35
|
+
|
36
|
+
rule(:posix_class) { cPERCENT >> name.as(:pclass) }
|
37
|
+
|
38
|
+
rule(:string) { cQUOTE >> quoted.as(:string) >> cQUOTE }
|
39
|
+
|
40
|
+
rule(:simple_class) { cSQUOTE >> single_quoted.as(:char_class) >> cSQUOTE }
|
41
|
+
rule(:negated_class) { cTILDE >> cSQUOTE >> single_quoted.as(:neg_class) >> cSQUOTE }
|
42
|
+
rule(:char_class) { simple_class | negated_class }
|
43
|
+
|
44
|
+
rule(:number) { digits }
|
45
|
+
rule(:numeric) { number | variable | parameter }
|
46
|
+
|
47
|
+
rule(:codepoint) { cAMPERSAND >> (hexdigit >> hexdigit >> hexdigit >> hexdigit).as(:unicode) }
|
48
|
+
|
49
|
+
rule(:char) { (cTICK >> graph_char.as(:char)) | codepoint }
|
50
|
+
|
51
|
+
rule(:simple_range) { char.as(:c1) >> cHYPHEN >> char.as(:c2) }
|
52
|
+
rule(:negated_range) { char.as(:nr1) >> cTILDE >> char.as(:nr2) }
|
53
|
+
rule(:range) { negated_range | simple_range }
|
54
|
+
|
55
|
+
rule(:negated_char) { cTILDE >> char.as(:nchar) } # ~`x means /[^x]/
|
56
|
+
|
57
|
+
rule(:capture) { capture_var.as(:lhs) >> space? >> (cEQUAL >> space? >> pattern.as(:rhs)).maybe }
|
58
|
+
|
59
|
+
rule(:simple_pattern) { predef | range | negated_char | posix_class | string |
|
60
|
+
# X `a-`c ~`a %name "abc"
|
61
|
+
char_class | char | parameter | variable | capture }
|
62
|
+
# 'abc' `a :param xyz @xyz = ...
|
63
|
+
|
64
|
+
rule(:qualifier) { (kANY | kMANY | kMAYBE | kNOCASE | kWITHIN | kESCAPING).as(:qualifier) >>
|
65
|
+
fancy_pattern.as(:match_item) }
|
66
|
+
|
67
|
+
# FIXME above: within and escaping can't really take an arbitrary pattern
|
68
|
+
|
69
|
+
###
|
70
|
+
rule(:pos_lookahead) { kFIND >> space >> simple_pattern.as(:findpat_ahead) >> space >>
|
71
|
+
kWITH >> space >> simple_pattern.as(:pospat) }
|
72
|
+
rule(:neg_lookahead) { kFIND >> space >> simple_pattern.as(:findpat_ahead) >> space >>
|
73
|
+
kWITHOUT >> space >> simple_pattern.as(:negpat) }
|
74
|
+
rule(:pos_lookbehind) { kWITH >> space >> simple_pattern.as(:pospat) >> space >>
|
75
|
+
kFIND >> space >> simple_pattern.as(:findpat_behind) }
|
76
|
+
rule(:neg_lookbehind) { kWITHOUT >> space >> simple_pattern.as(:negpat) >> space >>
|
77
|
+
kFIND >> space >> simple_pattern.as(:findpat_behind) }
|
78
|
+
rule(:lookaround) { pos_lookahead | neg_lookahead | pos_lookbehind | neg_lookbehind }
|
79
|
+
###
|
80
|
+
|
81
|
+
rule(:repeat1) { numeric.as(:num1) }
|
82
|
+
rule(:repeat2) { repeat1 >> cCOMMA >> numeric.as(:num2) }
|
83
|
+
rule(:repetition) { (repeat2 | repeat1) >> space? >> cTIMES >> space? >> fancy_pattern.as(:match_item) }
|
84
|
+
|
85
|
+
rule(:parenthesized) { cLPAREN >> space? >> pattern >> space? >> cRPAREN }
|
86
|
+
|
87
|
+
rule(:fancy_pattern) { space? >> (repetition | simple_pattern | qualifier | lookaround | parenthesized) >> space? }
|
88
|
+
# num `~"' keyword find/with (
|
89
|
+
|
90
|
+
rule(:concat) { (fancy_pattern >> (space? >> fancy_pattern).repeat(0)).as(:sequence) }
|
91
|
+
|
92
|
+
rule(:pattern) { (concat >> space? >> (cBAR >> space? >> concat).repeat(0)).as(:alternation) }
|
93
|
+
|
94
|
+
rule(:rvalue) { pattern | numeric } # a string is-a pattern
|
95
|
+
|
96
|
+
rule(:assignment) { space? >> name.as(:var) >> space? >> cEQUAL >> space? >> rvalue.as(:rvalue) }
|
97
|
+
|
98
|
+
rule(:definitions) { (endofline | assignment >> endofline).repeat(0) }
|
99
|
+
|
100
|
+
rule(:oneline_clause) { space? >> kMATCH >> space? >> pattern >> kEND >> endofline.maybe }
|
101
|
+
|
102
|
+
rule(:single_line) { endofline | space? >> pattern >> endofline }
|
103
|
+
|
104
|
+
rule(:multiline_clause) { space? >> kMATCH >> endofline >> single_line.repeat(1).as(:lines) >> space? >>
|
105
|
+
kEND >> endofline.maybe }
|
106
|
+
|
107
|
+
rule(:match_clause) { multiline_clause | oneline_clause }
|
108
|
+
|
109
|
+
rule(:program) { definitions.as(:definitions) >> match_clause.as(:match) >> endofline.repeat(0) }
|
110
|
+
|
111
|
+
root(:program)
|
112
|
+
end
|
113
|
+
|
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
abort "Require out of order" if ! defined? Regexador
|
4
|
+
|
5
|
+
class Regexador::Transform < Parslet::Transform
|
6
|
+
class Node
|
7
|
+
def self.make(*fields, &block)
|
8
|
+
klass = ::Class.new(self) do
|
9
|
+
fields.each {|field| attr_accessor field }
|
10
|
+
define_method(:fields) { fields.dup }
|
11
|
+
define_method(:to_s, &block)
|
12
|
+
end
|
13
|
+
klass
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize *values
|
17
|
+
fields.zip(values) {|f,v| self.send("#{f}=", v) }
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
raise NotImplementedError,
|
22
|
+
"Please implement #to_s for #{short_name}."
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_str
|
26
|
+
to_s
|
27
|
+
end
|
28
|
+
|
29
|
+
def short_name
|
30
|
+
str = self.class.name
|
31
|
+
str[str.rindex('::')+2..-1]
|
32
|
+
end
|
33
|
+
|
34
|
+
def inspect
|
35
|
+
data = fields.map {|f| "#{f}=#{self.send(f).inspect}" }.join(', ')
|
36
|
+
short_name + "(" + data + ")"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Later: Remember escaping for chars (char, c1, c2, nchar, ...)
|
41
|
+
|
42
|
+
XChar = Node.make(:char) do
|
43
|
+
Regexp.escape(char)
|
44
|
+
end
|
45
|
+
|
46
|
+
CharRange = Node.make(:c1, :c2) { "[#@c1-#@c2]" }
|
47
|
+
NegatedRange = Node.make(:nr1, :nr2) { "[^#@nr1-#@nr2]" }
|
48
|
+
NegatedChar = Node.make(:nchar) { "[^#@nchar]" } # More like a range really
|
49
|
+
POSIXClass = Node.make(:pclass) { "[[:#@pclass:]]" }
|
50
|
+
CharClass = Node.make(:char_class) { "[#@char_class]" }
|
51
|
+
NegatedClass = Node.make(:neg_class) { "[^#@neg_class]" }
|
52
|
+
Predefined = Node.make(:pre) do
|
53
|
+
sym = "p#@pre".to_sym
|
54
|
+
str = Regexador::Parser::Predef2Regex[sym]
|
55
|
+
raise "#@pre is not handled yet" if str.nil?
|
56
|
+
str
|
57
|
+
end
|
58
|
+
|
59
|
+
StringNode = Node.make(:string) { Regexp.escape(string.to_s) }
|
60
|
+
Repeat1 = Node.make(:num1, :match_item) { "(#@match_item){#@num1}" }
|
61
|
+
Repeat2 = Node.make(:num1, :num2, :match_item) { "(#@match_item){#@num1,#@num2}" }
|
62
|
+
Any = Node.make(:match_item) { "(#@match_item)*" }
|
63
|
+
Many = Node.make(:match_item) { "(#@match_item)+" }
|
64
|
+
Maybe = Node.make(:match_item) { "(#@match_item)?" }
|
65
|
+
Nocase = Node.make(:match_item) { "((?i)#@match_item)" }
|
66
|
+
|
67
|
+
FindWith = Node.make(:findpat_ahead, :pospat) { "((?=#@findpat_ahead#@pospat)#@findpat_ahead)" }
|
68
|
+
FindWithout = Node.make(:findpat_ahead, :negpat) { "((?!#@findpat#@negpat)#@findpat)" }
|
69
|
+
WithFind = Node.make(:pospat, :findpat_behind) { "((?<=#@pospat)#@findpat)" }
|
70
|
+
WithoutFind = Node.make(:negpat, :findpat_behind) { "((?<!#@negpat)#@pospat)" }
|
71
|
+
|
72
|
+
Within = Node.make(:delim) { "(#@delim.*?#@delim)" } # /x[^y]*?y/
|
73
|
+
Escaping = Node.make(:delim) { "\\#@delim|[^#@delim]*?#@delim" }
|
74
|
+
# escaping `" # /"(\\"|[^"])*?"/
|
75
|
+
|
76
|
+
Sequence = Node.make(:elements) { elements.map(&:to_s).join }
|
77
|
+
Alternation = Node.make(:elements) { '(' + elements.map(&:to_s).join('|') + ')' }
|
78
|
+
|
79
|
+
Assignment = Node.make(:var, :rvalue) { "" } # Doesn't actually translate directly.
|
80
|
+
Usage = Node.make(:var) { Assignment.bindings[var.to_s].to_s }
|
81
|
+
|
82
|
+
Program = Node.make(:definitions, :match) do
|
83
|
+
# NOTE Since we're using to_s for conversion to regular expression,
|
84
|
+
# debugging cannot be done using string interpolation, otherwise we
|
85
|
+
# call things out of order just by debug-printing them!
|
86
|
+
#
|
87
|
+
# puts "In Program: #{match}" # Don't do this
|
88
|
+
# puts "In Program: #{match.inspect}" # But this is OK
|
89
|
+
definitions.each {|d| d.store }
|
90
|
+
match.to_s
|
91
|
+
end
|
92
|
+
|
93
|
+
class Assignment < Node # For clarity: Really already is-a Node
|
94
|
+
class << self
|
95
|
+
attr_accessor :bindings
|
96
|
+
end
|
97
|
+
|
98
|
+
def store
|
99
|
+
# puts "Storing #@var = #{@rvalue.inspect}"
|
100
|
+
hash = self.class.bindings ||= {}
|
101
|
+
|
102
|
+
hash[@var.to_s] = @rvalue # Late binding
|
103
|
+
# hash[@var.to_s] = @rvalue.to_s # Early binding
|
104
|
+
# Think about the difference... :)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
Captured = Node.make(:cname, :pattern) { "(?<#@cname>#@pattern)" }
|
109
|
+
Backref = Node.make(:name) { "\\k<#@name>" }
|
110
|
+
|
111
|
+
Parameter = Node.make(:param) { "(#{param}){0}" }
|
112
|
+
|
113
|
+
PosAhead = Node.make(:pla1, :pla2) { "(?=#@pla1#@pla2)#@pla1" }
|
114
|
+
NegAhead = Node.make(:nla1, :nla2) { "(?!#@nla1#@nla2)#@nla1" }
|
115
|
+
PosBehind = Node.make(:plb1, :plb2) { "(?<=#@plb1)#@plb2" }
|
116
|
+
NegBehind = Node.make(:nlb1, :nlb2) { "(?<!#@nlb1)#@nlb2" }
|
117
|
+
|
118
|
+
# Actual transformation rules
|
119
|
+
|
120
|
+
rule(:char => simple(:ch)) { XChar.new(ch) }
|
121
|
+
rule(:unicode => simple(:hex4)) { StringNode.new("" << Integer("0x#{hex4}")) }
|
122
|
+
|
123
|
+
rule(:string => simple(:string)) { StringNode.new(string) }
|
124
|
+
# When the string is empty, parslet returns an empty array for lack of content.
|
125
|
+
# Map that to the empty string node.
|
126
|
+
rule(:string => sequence(:string)) { StringNode.new('') }
|
127
|
+
|
128
|
+
rule(:c1 => simple(:c1), :c2 => simple(:c2)) { CharRange.new(c1, c2) }
|
129
|
+
|
130
|
+
rule(:nr1 => simple(:nr1), :nr2 => simple(:nr2)) { NegatedRange.new(nr1, nr2) }
|
131
|
+
rule(:nchar => simple(:nchar)) { NegatedChar.new(nchar) } # Don't forget escaping
|
132
|
+
|
133
|
+
rule(:pclass => simple(:pclass)) { POSIXClass.new(pclass) }
|
134
|
+
|
135
|
+
rule(:char_class => simple(:char_class)) { CharClass.new(char_class) }
|
136
|
+
rule(:neg_class => simple(:neg_class)) { NegatedClass.new(neg_class) }
|
137
|
+
|
138
|
+
rule(:predef => simple(:content)) { Predefined.new(content) }
|
139
|
+
|
140
|
+
rule(:num1 => simple(:num1), :match_item => simple(:match_item)) { Repeat1.new(num1, match_item) }
|
141
|
+
|
142
|
+
rule(:num1 => simple(:num1), :num2 => simple(:num2), :match_item => simple(:match_item)) { Repeat2.new(num1, num2, match_item) }
|
143
|
+
|
144
|
+
rule(:qualifier => 'any', :match_item => simple(:match_item)) { Any.new(match_item) }
|
145
|
+
rule(:qualifier => 'many', :match_item => simple(:match_item)) { Many.new(match_item) }
|
146
|
+
rule(:qualifier => 'maybe', :match_item => simple(:match_item)) { Maybe.new(match_item) }
|
147
|
+
rule(:qualifier => 'nocase', :match_item => simple(:match_item)) { Nocase.new(match_item) }
|
148
|
+
rule(:qualifier => 'within', :match_item => simple(:match_item)) { Within.new(match_item) }
|
149
|
+
rule(:qualifier => 'escaping', :match_item => simple(:match_item)) { Escaping.new(match_item) }
|
150
|
+
|
151
|
+
rule(:findpat_ahead => simple(:pla1), :pospat => simple(:pla2)) { PosAhead.new(pla1, pla2) }
|
152
|
+
rule(:findpat_ahead => simple(:nla1), :negpat => simple(:nla2)) { NegAhead.new(nla1, nla2) }
|
153
|
+
rule(:pospat => simple(:plb1), :findpat_behind => simple(:plb2)) { PosBehind.new(plb1, plb2) }
|
154
|
+
rule(:negpat => simple(:nlb1), :findpat_behind => simple(:nlb2)) { NegBehind.new(nlb1, nlb2) }
|
155
|
+
|
156
|
+
rule(:var => simple(:var), :rvalue => simple(:rvalue)) { Assignment.new(@var, @rvalue) }
|
157
|
+
|
158
|
+
rule(:param => simple(:param)) { Parameter.new(param) }
|
159
|
+
|
160
|
+
rule(:alternation => simple(:pattern)) { pattern }
|
161
|
+
rule(:alternation => sequence(:alternatives)) { Alternation.new(alternatives) }
|
162
|
+
|
163
|
+
rule(:sequence => simple(:element)) { element }
|
164
|
+
rule(:sequence => sequence(:elements)) { Sequence.new(elements) }
|
165
|
+
|
166
|
+
# A series of statements on different lines is also a sequence.
|
167
|
+
rule(:lines => sequence(:lines)) { Sequence.new(lines) }
|
168
|
+
|
169
|
+
rule(:var => simple(:name)) { Usage.new(name) }
|
170
|
+
|
171
|
+
rule(:definitions => sequence(:definitions), :match => simple(:match)) { Program.new(definitions, match) }
|
172
|
+
rule(:definitions => sequence(:definitions), :match => sequence(:match)) { Program.new(definitions, match) }
|
173
|
+
|
174
|
+
# An expression of the form '@variable'
|
175
|
+
rule(:lhs => {:cvar => simple(:backref)}) { Backref.new(backref) }
|
176
|
+
|
177
|
+
# An expression of the form '@variable = expr'
|
178
|
+
rule(:lhs => {:cvar => simple(:cname)}, :rhs => simple(:pattern)) { Captured.new(cname, pattern) }
|
179
|
+
end
|
180
|
+
|
@@ -0,0 +1,174 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
require './spec/testing'
|
3
|
+
|
4
|
+
class Object
|
5
|
+
def succeeds
|
6
|
+
self.should_not == nil
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe Regexador do
|
11
|
+
before(:all) do
|
12
|
+
@parser = Regexador::Parser.new
|
13
|
+
@pattern = @parser.pattern
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "A special character" do
|
17
|
+
it "can be matched correctly" do
|
18
|
+
@parser.cSQUOTE.parse_with_debug("'").succeeds
|
19
|
+
@parser.cHASH.parse('#').succeeds
|
20
|
+
@parser.cNEWLINE.parse("\n").succeeds
|
21
|
+
@parser.cEQUAL.parse('=').succeeds
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "An international character" do
|
26
|
+
it "can follow a backtick" do #
|
27
|
+
@parser.char.parse_with_debug("`æ").succeeds
|
28
|
+
@parser.char.parse("`ß").succeeds
|
29
|
+
@parser.char.parse("`ç").succeeds
|
30
|
+
@parser.char.parse("`ö").succeeds
|
31
|
+
@parser.char.parse("`ñ").succeeds
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "A Unicode codepoint expression" do
|
36
|
+
it "can be matched" do
|
37
|
+
@parser.codepoint.parse_with_debug("&1234").succeeds
|
38
|
+
@parser.codepoint.parse('&beef').succeeds
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "A predefined token" do
|
43
|
+
%w(BOS EOS START END).each do |token|
|
44
|
+
describe token do
|
45
|
+
it 'matches using pattern' do
|
46
|
+
@parser.pattern.parse_with_debug(token).succeeds
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "An assignment" do
|
53
|
+
it "can be parsed" do
|
54
|
+
@parser.assignment.parse("a = 5").succeeds
|
55
|
+
@parser.assignment.parse("a= 5").succeeds
|
56
|
+
@parser.assignment.parse("a =5").succeeds
|
57
|
+
@parser.assignment.parse("a=5").succeeds
|
58
|
+
@parser.assignment.parse("myvar = 'xyz'").succeeds
|
59
|
+
@parser.assignment.parse('var2 = "hello"').succeeds
|
60
|
+
@parser.assignment.parse('this_var = `x-`z').succeeds
|
61
|
+
@parser.assignment.parse_with_debug('pat = maybe many `x-`z').succeeds
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "A keyword used as a variable name" do
|
66
|
+
it "will not parse" do
|
67
|
+
@parser.assignment.should_not parse("end = 'hello'")
|
68
|
+
# @parser.assignment.parse("endx = 'hello'")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe "A definition section" do
|
73
|
+
it "can be parsed" do
|
74
|
+
defs1 = "a = 5\nstr = \"hello\"\n"
|
75
|
+
@parser.definitions.parse_with_debug(defs1).succeeds
|
76
|
+
defs2 = <<-EOF
|
77
|
+
a = 5
|
78
|
+
# comment...
|
79
|
+
pat = maybe many `a-`c
|
80
|
+
# empty line follows:
|
81
|
+
|
82
|
+
str = "hello"
|
83
|
+
# another comment...
|
84
|
+
EOF
|
85
|
+
@parser.definitions.parse_with_debug(defs2).succeeds
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "A capture variable" do
|
90
|
+
it "can be parsed" do
|
91
|
+
str1 = "@myvar"
|
92
|
+
@parser.capture_var.parse(str1).succeeds
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe "A captured pattern" do
|
97
|
+
let(:prog) { "@myvar = maybe 'abc'" }
|
98
|
+
|
99
|
+
it "can be parsed (#capture)" do
|
100
|
+
@parser.capture.parse(prog).succeeds
|
101
|
+
end
|
102
|
+
it "can be parsed (#program)" do
|
103
|
+
@parser.parse("match #{prog} end").succeeds
|
104
|
+
end
|
105
|
+
end
|
106
|
+
describe "A back reference" do
|
107
|
+
let(:prog) { '@myvar' }
|
108
|
+
|
109
|
+
it 'can be parsed (#capture)' do
|
110
|
+
@parser.capture.parse(prog).succeeds
|
111
|
+
end
|
112
|
+
it 'can be parsed' do
|
113
|
+
@parser.parse("match #{prog} end").succeeds
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
describe "A one-line match clause" do
|
119
|
+
it "can be parsed" do
|
120
|
+
mc1 = <<-EOF
|
121
|
+
match `a~`x end
|
122
|
+
EOF
|
123
|
+
@parser.match_clause.parse_with_debug(mc1).succeeds
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
describe "A multiline match clause" do
|
128
|
+
it "can be parsed" do
|
129
|
+
mc2 = <<-EOF
|
130
|
+
match
|
131
|
+
`< "tag" WB
|
132
|
+
any ~`>
|
133
|
+
# blah blah blah
|
134
|
+
"</" "tag" `>
|
135
|
+
end
|
136
|
+
EOF
|
137
|
+
@parser.multiline_clause.parse_with_debug(mc2).succeeds
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe "An entire one-line program" do
|
142
|
+
it "can be parsed" do
|
143
|
+
prog = "match `a-`f end"
|
144
|
+
@parser.parse_with_debug(prog).succeeds
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
describe "An entire program" do
|
150
|
+
it "can be parsed" do
|
151
|
+
prog1 = <<-EOF
|
152
|
+
dot = "."
|
153
|
+
num = "25" D5 | `2 D4 D | maybe D1 1,2*D
|
154
|
+
match WB num dot num dot num dot num WB end
|
155
|
+
EOF
|
156
|
+
@parser.program.parse_with_debug(prog1).succeeds
|
157
|
+
|
158
|
+
prog2 = <<-EOF
|
159
|
+
# Warning: This one likely has errors!
|
160
|
+
|
161
|
+
visa = `4 12*D maybe 3*D
|
162
|
+
mc = `5 D5 14*D
|
163
|
+
amex = `3 '47' 13*D
|
164
|
+
diners = `3 (`0 D5 | '68' D) 11*D
|
165
|
+
discover = `6 ("011" | `5 2*D) 12*D
|
166
|
+
jcb = ("2131"|"1800"|"35" 3*D) 11*D
|
167
|
+
|
168
|
+
match visa | mc | amex | diners | discover | jcb end
|
169
|
+
EOF
|
170
|
+
@parser.program.parse_with_debug(prog2).succeeds
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|