rley 0.5.08 → 0.5.09
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/examples/general/SRL/lib/ast_builder.rb +74 -78
- data/examples/general/SRL/lib/grammar.rb +11 -3
- data/examples/general/SRL/lib/regex/abstract_method.rb +35 -0
- data/examples/general/SRL/lib/regex/atomic_expression.rb +21 -0
- data/examples/general/SRL/lib/regex/char_class.rb +34 -0
- data/examples/general/SRL/lib/regex/char_range.rb +50 -0
- data/examples/general/SRL/lib/regex/character.rb +195 -0
- data/examples/general/SRL/lib/regex/compound_expression.rb +60 -0
- data/examples/general/SRL/lib/regex/expression.rb +42 -0
- data/examples/general/SRL/lib/regex/monadic_expression.rb +31 -0
- data/examples/general/SRL/lib/regex/polyadic_expression.rb +64 -0
- data/examples/general/SRL/lib/regex/quantifiable.rb +28 -0
- data/examples/general/SRL/lib/regex/repetition.rb +31 -0
- data/examples/general/SRL/lib/regex_repr.rb +5 -1
- data/examples/general/SRL/lib/tokenizer.rb +8 -5
- data/examples/general/SRL/spec/integration_spec.rb +64 -41
- data/examples/general/SRL/spec/regex/character_spec.rb +159 -0
- data/examples/general/SRL/spec/tokenizer_spec.rb +18 -4
- data/examples/general/SRL/srl_demo.rb +14 -4
- data/lib/rley/constants.rb +1 -1
- metadata +14 -3
- data/examples/general/SRL/lib/srl_demo.rb +0 -67
@@ -0,0 +1,60 @@
|
|
1
|
+
# File: compound_expression.rb
|
2
|
+
|
3
|
+
require_relative "expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. An element that is part of a regular expression &
|
8
|
+
# that has its own child sub-expressions.
|
9
|
+
class CompoundExpression < Expression
|
10
|
+
|
11
|
+
public
|
12
|
+
# Redefined method. Return false since it may have one or more children.
|
13
|
+
def atomic?
|
14
|
+
return false
|
15
|
+
end
|
16
|
+
|
17
|
+
=begin
|
18
|
+
# Build a depth-first in-order children visitor.
|
19
|
+
# The visitor is implemented as an Enumerator.
|
20
|
+
def df_visitor()
|
21
|
+
root = children # The visit will start from the children of this object
|
22
|
+
|
23
|
+
visitor = Enumerator.new do |result| # result is a Yielder
|
24
|
+
# Initialization part: will run once
|
25
|
+
visit_stack = [ root ] # The LIFO queue of nodes to visit
|
26
|
+
|
27
|
+
begin # Traversal part (as a loop)
|
28
|
+
top = visit_stack.pop()
|
29
|
+
if top.kind_of?(Array)
|
30
|
+
if top.empty?
|
31
|
+
next
|
32
|
+
else
|
33
|
+
currChild = top.pop()
|
34
|
+
visit_stack.push top
|
35
|
+
end
|
36
|
+
else
|
37
|
+
currChild = top
|
38
|
+
end
|
39
|
+
|
40
|
+
result << currChild # Return the visited child
|
41
|
+
|
42
|
+
unless currChild.atomic?
|
43
|
+
children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
|
44
|
+
visit_stack.push(children_to_enqueue)
|
45
|
+
end
|
46
|
+
end until visit_stack.empty?
|
47
|
+
end
|
48
|
+
end
|
49
|
+
=end
|
50
|
+
|
51
|
+
protected
|
52
|
+
# Abstract method. Return the text representation of the child (if any)
|
53
|
+
def all_child_text() abstract_method()
|
54
|
+
end
|
55
|
+
|
56
|
+
end # class
|
57
|
+
|
58
|
+
end # module
|
59
|
+
|
60
|
+
# End of file
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# File: expression.rb
|
2
|
+
|
3
|
+
require_relative 'abstract_method'
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. The generalization of any valid regular (sub)expression.
|
8
|
+
class Expression
|
9
|
+
def initialize()
|
10
|
+
end
|
11
|
+
|
12
|
+
public
|
13
|
+
# Abstract method. Return true iff the expression is atomic (= may not have any child).
|
14
|
+
def atomic? abstract_method
|
15
|
+
end
|
16
|
+
|
17
|
+
# Abstract method. Return the number of values that match this expression.
|
18
|
+
# [theParentOptions] an Hash of matching options. They are overridden by options with same name
|
19
|
+
# that are bound to this object.
|
20
|
+
def cardinality(theParentOptions) abstract_method
|
21
|
+
end
|
22
|
+
|
23
|
+
protected
|
24
|
+
# Determine the matching options to apply to this object, given the options coming from the parent
|
25
|
+
# and options that are local to this object. Local options take precedence.
|
26
|
+
# [theParentOptions] a Hash of matching options. They are overridden by options with same name
|
27
|
+
# that are bound to this object.
|
28
|
+
def options(theParentOptions)
|
29
|
+
resulting_options = theParentOptions.merge(@local_options)
|
30
|
+
return resulting_options
|
31
|
+
end
|
32
|
+
|
33
|
+
# Abstract conversion method.
|
34
|
+
# Purpose: Return the String representation of the expression.
|
35
|
+
def to_str() abstract_method
|
36
|
+
end
|
37
|
+
|
38
|
+
end # class
|
39
|
+
|
40
|
+
end # module
|
41
|
+
|
42
|
+
# End of file
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# File: monadic_expression.rb
|
2
|
+
|
3
|
+
require_relative "compound_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. An element that is part of a regular expression &
|
8
|
+
# that can have up to one child sub-expression.
|
9
|
+
class MonadicExpression < CompoundExpression
|
10
|
+
# The (optional) child sub-expression
|
11
|
+
attr_reader(:child)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(theChild)
|
15
|
+
super()
|
16
|
+
@child = theChild
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# Return the text representation of the child (if any)
|
21
|
+
def all_child_text()
|
22
|
+
result = child.nil? ? '' : child.to_str()
|
23
|
+
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class
|
28
|
+
|
29
|
+
end # module
|
30
|
+
|
31
|
+
# End of file
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# File: polyadic_expression.rb
|
2
|
+
|
3
|
+
require_relative "compound_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. An element that is part of a regular expression &
|
8
|
+
# that has its own child sub-expressions.
|
9
|
+
class PolyadicExpression < CompoundExpression
|
10
|
+
# The aggregation of child elements
|
11
|
+
attr_reader(:children)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(theChildren)
|
15
|
+
super()
|
16
|
+
@children = theChildren
|
17
|
+
end
|
18
|
+
|
19
|
+
public
|
20
|
+
# Append the given child to the list of children.
|
21
|
+
# TODO: assess whether to defer to a subclass NAryExpression
|
22
|
+
def <<(aChild)
|
23
|
+
@children << aChild
|
24
|
+
|
25
|
+
return self
|
26
|
+
end
|
27
|
+
|
28
|
+
# Build a depth-first in-order children visitor.
|
29
|
+
# The visitor is implemented as an Enumerator.
|
30
|
+
def df_visitor()
|
31
|
+
root = children # The visit will start from the children of this object
|
32
|
+
|
33
|
+
visitor = Enumerator.new do |result| # result is a Yielder
|
34
|
+
# Initialization part: will run once
|
35
|
+
visit_stack = [ root ] # The LIFO queue of nodes to visit
|
36
|
+
|
37
|
+
begin # Traversal part (as a loop)
|
38
|
+
top = visit_stack.pop()
|
39
|
+
if top.kind_of?(Array)
|
40
|
+
if top.empty?
|
41
|
+
next
|
42
|
+
else
|
43
|
+
currChild = top.pop()
|
44
|
+
visit_stack.push top
|
45
|
+
end
|
46
|
+
else
|
47
|
+
currChild = top
|
48
|
+
end
|
49
|
+
|
50
|
+
result << currChild # Return the visited child
|
51
|
+
|
52
|
+
unless currChild.atomic?
|
53
|
+
children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
|
54
|
+
visit_stack.push(children_to_enqueue)
|
55
|
+
end
|
56
|
+
end until visit_stack.empty?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end # class
|
61
|
+
|
62
|
+
end # module
|
63
|
+
|
64
|
+
# End of file
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# File: quantifiable.rb
|
2
|
+
|
3
|
+
require_relative 'multiplicity'
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
|
8
|
+
module Quantifiable
|
9
|
+
|
10
|
+
public
|
11
|
+
# Redefined method. Return true since it may not have any child.
|
12
|
+
def quantified?
|
13
|
+
return @quantifier.nil? ? false :true
|
14
|
+
end
|
15
|
+
|
16
|
+
def quantifier
|
17
|
+
@quantifier
|
18
|
+
end
|
19
|
+
|
20
|
+
def quantifier=(aQuantifier)
|
21
|
+
@quantifier = aQuantifier
|
22
|
+
end
|
23
|
+
|
24
|
+
end # class
|
25
|
+
|
26
|
+
end # module
|
27
|
+
|
28
|
+
# End of file
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# File: repetition.rb
|
2
|
+
|
3
|
+
require_relative "monadic_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. An unary matching operator.
|
8
|
+
# It succeeds when the specified reptition of the child expression succeeds to match
|
9
|
+
# the subject text in the same serial arrangement
|
10
|
+
class Repetition < MonadicExpression
|
11
|
+
attr_reader(:multiplicity)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(childExpressionToRepeat, aMultiplicity)
|
15
|
+
super(childExpressionToRepeat)
|
16
|
+
@multiplicity = aMultiplicity
|
17
|
+
end
|
18
|
+
|
19
|
+
public
|
20
|
+
# Conversion method re-definition.
|
21
|
+
# Purpose: Return the String representation of the concatented expressions.
|
22
|
+
def to_str()
|
23
|
+
result = all_child_text() + multiplicity.to_str()
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class
|
28
|
+
|
29
|
+
end # module
|
30
|
+
|
31
|
+
# End of file
|
@@ -29,14 +29,18 @@ module SRL
|
|
29
29
|
AT
|
30
30
|
BETWEEN
|
31
31
|
EXACTLY
|
32
|
+
FROM
|
32
33
|
LEAST
|
34
|
+
LETTER
|
33
35
|
MORE
|
34
36
|
NEVER
|
35
37
|
ONCE
|
36
38
|
OPTIONAL
|
37
39
|
OR
|
38
40
|
TIMES
|
41
|
+
TO
|
39
42
|
TWICE
|
43
|
+
UPPERCASE
|
40
44
|
].map { |x| [x, x] } .to_h
|
41
45
|
|
42
46
|
class ScanError < StandardError; end
|
@@ -67,18 +71,17 @@ module SRL
|
|
67
71
|
token = nil
|
68
72
|
|
69
73
|
if '(),'.include? curr_ch
|
70
|
-
#
|
74
|
+
# Delimiters, separators => single character token
|
71
75
|
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
72
76
|
elsif (lexeme = scanner.scan(/[0-9]{2,}/))
|
73
77
|
token = build_token('INTEGER', lexeme) # An integer has two or more digits
|
74
78
|
elsif (lexeme = scanner.scan(/[0-9]/))
|
75
|
-
token = build_token('
|
79
|
+
token = build_token('DIGIT_LIT', lexeme)
|
76
80
|
elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
|
77
81
|
token = build_token(@@keywords[lexeme.upcase], lexeme)
|
78
82
|
# TODO: handle case unknown identifier
|
79
|
-
elsif (lexeme = scanner.scan(
|
80
|
-
|
81
|
-
token = build_token('CHAR', lexeme)
|
83
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
|
84
|
+
token = build_token('LETTER_LIT', lexeme)
|
82
85
|
else # Unknown token
|
83
86
|
erroneous = curr_ch.nil? ? '' : curr_ch
|
84
87
|
sequel = scanner.scan(/.{1,20}/)
|
@@ -15,89 +15,112 @@ describe 'Integration tests:' do
|
|
15
15
|
regexp = tree.root
|
16
16
|
end
|
17
17
|
|
18
|
+
context 'Parsing character ranges:' do
|
19
|
+
it "should parse 'letter from ... to ...' syntax" do
|
20
|
+
result = parse('letter from a to f')
|
21
|
+
expect(result).to be_success
|
22
|
+
|
23
|
+
regexp = regexp_repr(result)
|
24
|
+
expect(regexp.to_str).to eq('[a-f]')
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse 'uppercase letter from ... to ...' syntax" do
|
28
|
+
result = parse('UPPERCASE letter from A to F')
|
29
|
+
expect(result).to be_success
|
30
|
+
|
31
|
+
regexp = regexp_repr(result)
|
32
|
+
expect(regexp.to_str).to eq('[A-F]')
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse 'letter' syntax" do
|
36
|
+
result = parse('letter')
|
37
|
+
expect(result).to be_success
|
38
|
+
|
39
|
+
regexp = regexp_repr(result)
|
40
|
+
expect(regexp.to_str).to eq('[a-z]')
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should parse 'uppercase letter' syntax" do
|
44
|
+
result = parse('uppercase letter')
|
45
|
+
expect(result).to be_success
|
46
|
+
|
47
|
+
regexp = regexp_repr(result)
|
48
|
+
expect(regexp.to_str).to eq('[A-Z]')
|
49
|
+
end
|
50
|
+
|
51
|
+
end # context
|
52
|
+
|
18
53
|
context 'Parsing quantifiers:' do
|
54
|
+
let(:prefix) { 'letter from p to t ' }
|
55
|
+
|
19
56
|
it "should parse 'once' syntax" do
|
20
|
-
result = parse('once')
|
57
|
+
result = parse(prefix + 'once')
|
21
58
|
expect(result).to be_success
|
22
59
|
|
23
60
|
regexp = regexp_repr(result)
|
24
|
-
expect(regexp.to_str).to eq('{1}')
|
61
|
+
expect(regexp.to_str).to eq('[p-t]{1}')
|
25
62
|
end
|
26
63
|
|
27
64
|
it "should parse 'twice' syntax" do
|
28
|
-
result = parse('twice')
|
65
|
+
result = parse(prefix + 'twice')
|
29
66
|
expect(result).to be_success
|
30
|
-
|
67
|
+
|
31
68
|
regexp = regexp_repr(result)
|
32
|
-
expect(regexp.to_str).to eq('{2}')
|
69
|
+
expect(regexp.to_str).to eq('[p-t]{2}')
|
33
70
|
end
|
34
71
|
|
35
72
|
it "should parse 'optional' syntax" do
|
36
|
-
result = parse('optional')
|
73
|
+
result = parse(prefix + 'optional')
|
37
74
|
expect(result).to be_success
|
38
|
-
|
75
|
+
|
39
76
|
regexp = regexp_repr(result)
|
40
|
-
expect(regexp.to_str).to eq('?')
|
77
|
+
expect(regexp.to_str).to eq('[p-t]?')
|
41
78
|
end
|
42
79
|
|
43
80
|
it "should parse 'exactly ... times' syntax" do
|
44
|
-
result = parse('exactly 4 times')
|
81
|
+
result = parse('letter from a to f exactly 4 times')
|
45
82
|
expect(result).to be_success
|
46
|
-
|
83
|
+
|
47
84
|
regexp = regexp_repr(result)
|
48
|
-
expect(regexp.to_str).to eq('{4}')
|
85
|
+
expect(regexp.to_str).to eq('[a-f]{4}')
|
49
86
|
end
|
50
87
|
|
51
88
|
it "should parse 'between ... and ... times' syntax" do
|
52
|
-
result = parse('between 2 and 4 times')
|
89
|
+
result = parse(prefix + 'between 2 and 4 times')
|
53
90
|
expect(result).to be_success
|
54
91
|
|
55
92
|
# Dropping 'times' keyword is shorter syntax
|
56
|
-
expect(parse('between 2 and 4')).to be_success
|
57
|
-
|
93
|
+
expect(parse(prefix + 'between 2 and 4')).to be_success
|
94
|
+
|
58
95
|
regexp = regexp_repr(result)
|
59
|
-
expect(regexp.to_str).to eq('{2,
|
96
|
+
expect(regexp.to_str).to eq('[p-t]{2,4}')
|
60
97
|
end
|
61
98
|
|
99
|
+
|
62
100
|
it "should parse 'once or more' syntax" do
|
63
|
-
result = parse('once or more')
|
101
|
+
result = parse(prefix + 'once or more')
|
64
102
|
expect(result).to be_success
|
103
|
+
|
104
|
+
regexp = regexp_repr(result)
|
105
|
+
expect(regexp.to_str).to eq('[p-t]+')
|
65
106
|
end
|
66
107
|
|
67
108
|
it "should parse 'never or more' syntax" do
|
68
|
-
result = parse('never or more')
|
109
|
+
result = parse(prefix + 'never or more')
|
69
110
|
expect(result).to be_success
|
111
|
+
|
112
|
+
regexp = regexp_repr(result)
|
113
|
+
expect(regexp.to_str).to eq('[p-t]*')
|
70
114
|
end
|
71
115
|
|
72
116
|
it "should parse 'at least ... times' syntax" do
|
73
|
-
result = parse('at least 10 times')
|
117
|
+
result = parse(prefix + 'at least 10 times')
|
74
118
|
expect(result).to be_success
|
75
|
-
|
119
|
+
|
76
120
|
regexp = regexp_repr(result)
|
77
|
-
expect(regexp.to_str).to eq('{10,}')
|
121
|
+
expect(regexp.to_str).to eq('[p-t]{10,}')
|
78
122
|
end
|
79
|
-
|
80
123
|
end # context
|
81
|
-
|
82
124
|
end # describe
|
83
125
|
|
84
126
|
|
85
|
-
=begin
|
86
|
-
|
87
|
-
unless result.success?
|
88
|
-
# Stop if the parse failed...
|
89
|
-
puts "Parsing of '#{ARGV[0]}' failed"
|
90
|
-
puts "Reason: #{result.failure_reason.message}"
|
91
|
-
exit(1)
|
92
|
-
end
|
93
|
-
|
94
|
-
|
95
|
-
# Generate a concrete syntax parse tree from the parse result
|
96
|
-
cst_ptree = result.parse_tree
|
97
|
-
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
98
|
-
|
99
|
-
# Generate an abstract syntax parse tree from the parse result
|
100
|
-
tree_builder = ASTBuilder
|
101
|
-
ast_ptree = result.parse_tree(tree_builder)
|
102
|
-
=end
|
103
|
-
|