rley 0.5.08 → 0.5.09
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/examples/general/SRL/lib/ast_builder.rb +74 -78
- data/examples/general/SRL/lib/grammar.rb +11 -3
- data/examples/general/SRL/lib/regex/abstract_method.rb +35 -0
- data/examples/general/SRL/lib/regex/atomic_expression.rb +21 -0
- data/examples/general/SRL/lib/regex/char_class.rb +34 -0
- data/examples/general/SRL/lib/regex/char_range.rb +50 -0
- data/examples/general/SRL/lib/regex/character.rb +195 -0
- data/examples/general/SRL/lib/regex/compound_expression.rb +60 -0
- data/examples/general/SRL/lib/regex/expression.rb +42 -0
- data/examples/general/SRL/lib/regex/monadic_expression.rb +31 -0
- data/examples/general/SRL/lib/regex/polyadic_expression.rb +64 -0
- data/examples/general/SRL/lib/regex/quantifiable.rb +28 -0
- data/examples/general/SRL/lib/regex/repetition.rb +31 -0
- data/examples/general/SRL/lib/regex_repr.rb +5 -1
- data/examples/general/SRL/lib/tokenizer.rb +8 -5
- data/examples/general/SRL/spec/integration_spec.rb +64 -41
- data/examples/general/SRL/spec/regex/character_spec.rb +159 -0
- data/examples/general/SRL/spec/tokenizer_spec.rb +18 -4
- data/examples/general/SRL/srl_demo.rb +14 -4
- data/lib/rley/constants.rb +1 -1
- metadata +14 -3
- data/examples/general/SRL/lib/srl_demo.rb +0 -67
@@ -0,0 +1,60 @@
|
|
1
|
+
# File: compound_expression.rb
|
2
|
+
|
3
|
+
require_relative "expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. An element that is part of a regular expression &
|
8
|
+
# that has its own child sub-expressions.
|
9
|
+
class CompoundExpression < Expression
|
10
|
+
|
11
|
+
public
|
12
|
+
# Redefined method. Return false since it may have one or more children.
|
13
|
+
def atomic?
|
14
|
+
return false
|
15
|
+
end
|
16
|
+
|
17
|
+
=begin
|
18
|
+
# Build a depth-first in-order children visitor.
|
19
|
+
# The visitor is implemented as an Enumerator.
|
20
|
+
def df_visitor()
|
21
|
+
root = children # The visit will start from the children of this object
|
22
|
+
|
23
|
+
visitor = Enumerator.new do |result| # result is a Yielder
|
24
|
+
# Initialization part: will run once
|
25
|
+
visit_stack = [ root ] # The LIFO queue of nodes to visit
|
26
|
+
|
27
|
+
begin # Traversal part (as a loop)
|
28
|
+
top = visit_stack.pop()
|
29
|
+
if top.kind_of?(Array)
|
30
|
+
if top.empty?
|
31
|
+
next
|
32
|
+
else
|
33
|
+
currChild = top.pop()
|
34
|
+
visit_stack.push top
|
35
|
+
end
|
36
|
+
else
|
37
|
+
currChild = top
|
38
|
+
end
|
39
|
+
|
40
|
+
result << currChild # Return the visited child
|
41
|
+
|
42
|
+
unless currChild.atomic?
|
43
|
+
children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
|
44
|
+
visit_stack.push(children_to_enqueue)
|
45
|
+
end
|
46
|
+
end until visit_stack.empty?
|
47
|
+
end
|
48
|
+
end
|
49
|
+
=end
|
50
|
+
|
51
|
+
protected
|
52
|
+
# Abstract method. Return the text representation of the child (if any)
|
53
|
+
def all_child_text() abstract_method()
|
54
|
+
end
|
55
|
+
|
56
|
+
end # class
|
57
|
+
|
58
|
+
end # module
|
59
|
+
|
60
|
+
# End of file
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# File: expression.rb
|
2
|
+
|
3
|
+
require_relative 'abstract_method'
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. The generalization of any valid regular (sub)expression.
|
8
|
+
class Expression
|
9
|
+
def initialize()
|
10
|
+
end
|
11
|
+
|
12
|
+
public
|
13
|
+
# Abstract method. Return true iff the expression is atomic (= may not have any child).
|
14
|
+
def atomic? abstract_method
|
15
|
+
end
|
16
|
+
|
17
|
+
# Abstract method. Return the number of values that match this expression.
|
18
|
+
# [theParentOptions] an Hash of matching options. They are overridden by options with same name
|
19
|
+
# that are bound to this object.
|
20
|
+
def cardinality(theParentOptions) abstract_method
|
21
|
+
end
|
22
|
+
|
23
|
+
protected
|
24
|
+
# Determine the matching options to apply to this object, given the options coming from the parent
|
25
|
+
# and options that are local to this object. Local options take precedence.
|
26
|
+
# [theParentOptions] a Hash of matching options. They are overridden by options with same name
|
27
|
+
# that are bound to this object.
|
28
|
+
def options(theParentOptions)
|
29
|
+
resulting_options = theParentOptions.merge(@local_options)
|
30
|
+
return resulting_options
|
31
|
+
end
|
32
|
+
|
33
|
+
# Abstract conversion method.
|
34
|
+
# Purpose: Return the String representation of the expression.
|
35
|
+
def to_str() abstract_method
|
36
|
+
end
|
37
|
+
|
38
|
+
end # class
|
39
|
+
|
40
|
+
end # module
|
41
|
+
|
42
|
+
# End of file
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# File: monadic_expression.rb
|
2
|
+
|
3
|
+
require_relative "compound_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. An element that is part of a regular expression &
|
8
|
+
# that can have up to one child sub-expression.
|
9
|
+
class MonadicExpression < CompoundExpression
|
10
|
+
# The (optional) child sub-expression
|
11
|
+
attr_reader(:child)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(theChild)
|
15
|
+
super()
|
16
|
+
@child = theChild
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# Return the text representation of the child (if any)
|
21
|
+
def all_child_text()
|
22
|
+
result = child.nil? ? '' : child.to_str()
|
23
|
+
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class
|
28
|
+
|
29
|
+
end # module
|
30
|
+
|
31
|
+
# End of file
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# File: polyadic_expression.rb
|
2
|
+
|
3
|
+
require_relative "compound_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. An element that is part of a regular expression &
|
8
|
+
# that has its own child sub-expressions.
|
9
|
+
class PolyadicExpression < CompoundExpression
|
10
|
+
# The aggregation of child elements
|
11
|
+
attr_reader(:children)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(theChildren)
|
15
|
+
super()
|
16
|
+
@children = theChildren
|
17
|
+
end
|
18
|
+
|
19
|
+
public
|
20
|
+
# Append the given child to the list of children.
|
21
|
+
# TODO: assess whether to defer to a subclass NAryExpression
|
22
|
+
def <<(aChild)
|
23
|
+
@children << aChild
|
24
|
+
|
25
|
+
return self
|
26
|
+
end
|
27
|
+
|
28
|
+
# Build a depth-first in-order children visitor.
|
29
|
+
# The visitor is implemented as an Enumerator.
|
30
|
+
def df_visitor()
|
31
|
+
root = children # The visit will start from the children of this object
|
32
|
+
|
33
|
+
visitor = Enumerator.new do |result| # result is a Yielder
|
34
|
+
# Initialization part: will run once
|
35
|
+
visit_stack = [ root ] # The LIFO queue of nodes to visit
|
36
|
+
|
37
|
+
begin # Traversal part (as a loop)
|
38
|
+
top = visit_stack.pop()
|
39
|
+
if top.kind_of?(Array)
|
40
|
+
if top.empty?
|
41
|
+
next
|
42
|
+
else
|
43
|
+
currChild = top.pop()
|
44
|
+
visit_stack.push top
|
45
|
+
end
|
46
|
+
else
|
47
|
+
currChild = top
|
48
|
+
end
|
49
|
+
|
50
|
+
result << currChild # Return the visited child
|
51
|
+
|
52
|
+
unless currChild.atomic?
|
53
|
+
children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
|
54
|
+
visit_stack.push(children_to_enqueue)
|
55
|
+
end
|
56
|
+
end until visit_stack.empty?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end # class
|
61
|
+
|
62
|
+
end # module
|
63
|
+
|
64
|
+
# End of file
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# File: quantifiable.rb
|
2
|
+
|
3
|
+
require_relative 'multiplicity'
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
|
8
|
+
module Quantifiable
|
9
|
+
|
10
|
+
public
|
11
|
+
# Redefined method. Return true since it may not have any child.
|
12
|
+
def quantified?
|
13
|
+
return @quantifier.nil? ? false :true
|
14
|
+
end
|
15
|
+
|
16
|
+
def quantifier
|
17
|
+
@quantifier
|
18
|
+
end
|
19
|
+
|
20
|
+
def quantifier=(aQuantifier)
|
21
|
+
@quantifier = aQuantifier
|
22
|
+
end
|
23
|
+
|
24
|
+
end # class
|
25
|
+
|
26
|
+
end # module
|
27
|
+
|
28
|
+
# End of file
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# File: repetition.rb
|
2
|
+
|
3
|
+
require_relative "monadic_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. An unary matching operator.
|
8
|
+
# It succeeds when the specified reptition of the child expression succeeds to match
|
9
|
+
# the subject text in the same serial arrangement
|
10
|
+
class Repetition < MonadicExpression
|
11
|
+
attr_reader(:multiplicity)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(childExpressionToRepeat, aMultiplicity)
|
15
|
+
super(childExpressionToRepeat)
|
16
|
+
@multiplicity = aMultiplicity
|
17
|
+
end
|
18
|
+
|
19
|
+
public
|
20
|
+
# Conversion method re-definition.
|
21
|
+
# Purpose: Return the String representation of the concatented expressions.
|
22
|
+
def to_str()
|
23
|
+
result = all_child_text() + multiplicity.to_str()
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class
|
28
|
+
|
29
|
+
end # module
|
30
|
+
|
31
|
+
# End of file
|
@@ -29,14 +29,18 @@ module SRL
|
|
29
29
|
AT
|
30
30
|
BETWEEN
|
31
31
|
EXACTLY
|
32
|
+
FROM
|
32
33
|
LEAST
|
34
|
+
LETTER
|
33
35
|
MORE
|
34
36
|
NEVER
|
35
37
|
ONCE
|
36
38
|
OPTIONAL
|
37
39
|
OR
|
38
40
|
TIMES
|
41
|
+
TO
|
39
42
|
TWICE
|
43
|
+
UPPERCASE
|
40
44
|
].map { |x| [x, x] } .to_h
|
41
45
|
|
42
46
|
class ScanError < StandardError; end
|
@@ -67,18 +71,17 @@ module SRL
|
|
67
71
|
token = nil
|
68
72
|
|
69
73
|
if '(),'.include? curr_ch
|
70
|
-
#
|
74
|
+
# Delimiters, separators => single character token
|
71
75
|
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
72
76
|
elsif (lexeme = scanner.scan(/[0-9]{2,}/))
|
73
77
|
token = build_token('INTEGER', lexeme) # An integer has two or more digits
|
74
78
|
elsif (lexeme = scanner.scan(/[0-9]/))
|
75
|
-
token = build_token('
|
79
|
+
token = build_token('DIGIT_LIT', lexeme)
|
76
80
|
elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
|
77
81
|
token = build_token(@@keywords[lexeme.upcase], lexeme)
|
78
82
|
# TODO: handle case unknown identifier
|
79
|
-
elsif (lexeme = scanner.scan(
|
80
|
-
|
81
|
-
token = build_token('CHAR', lexeme)
|
83
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
|
84
|
+
token = build_token('LETTER_LIT', lexeme)
|
82
85
|
else # Unknown token
|
83
86
|
erroneous = curr_ch.nil? ? '' : curr_ch
|
84
87
|
sequel = scanner.scan(/.{1,20}/)
|
@@ -15,89 +15,112 @@ describe 'Integration tests:' do
|
|
15
15
|
regexp = tree.root
|
16
16
|
end
|
17
17
|
|
18
|
+
context 'Parsing character ranges:' do
|
19
|
+
it "should parse 'letter from ... to ...' syntax" do
|
20
|
+
result = parse('letter from a to f')
|
21
|
+
expect(result).to be_success
|
22
|
+
|
23
|
+
regexp = regexp_repr(result)
|
24
|
+
expect(regexp.to_str).to eq('[a-f]')
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse 'uppercase letter from ... to ...' syntax" do
|
28
|
+
result = parse('UPPERCASE letter from A to F')
|
29
|
+
expect(result).to be_success
|
30
|
+
|
31
|
+
regexp = regexp_repr(result)
|
32
|
+
expect(regexp.to_str).to eq('[A-F]')
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse 'letter' syntax" do
|
36
|
+
result = parse('letter')
|
37
|
+
expect(result).to be_success
|
38
|
+
|
39
|
+
regexp = regexp_repr(result)
|
40
|
+
expect(regexp.to_str).to eq('[a-z]')
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should parse 'uppercase letter' syntax" do
|
44
|
+
result = parse('uppercase letter')
|
45
|
+
expect(result).to be_success
|
46
|
+
|
47
|
+
regexp = regexp_repr(result)
|
48
|
+
expect(regexp.to_str).to eq('[A-Z]')
|
49
|
+
end
|
50
|
+
|
51
|
+
end # context
|
52
|
+
|
18
53
|
context 'Parsing quantifiers:' do
|
54
|
+
let(:prefix) { 'letter from p to t ' }
|
55
|
+
|
19
56
|
it "should parse 'once' syntax" do
|
20
|
-
result = parse('once')
|
57
|
+
result = parse(prefix + 'once')
|
21
58
|
expect(result).to be_success
|
22
59
|
|
23
60
|
regexp = regexp_repr(result)
|
24
|
-
expect(regexp.to_str).to eq('{1}')
|
61
|
+
expect(regexp.to_str).to eq('[p-t]{1}')
|
25
62
|
end
|
26
63
|
|
27
64
|
it "should parse 'twice' syntax" do
|
28
|
-
result = parse('twice')
|
65
|
+
result = parse(prefix + 'twice')
|
29
66
|
expect(result).to be_success
|
30
|
-
|
67
|
+
|
31
68
|
regexp = regexp_repr(result)
|
32
|
-
expect(regexp.to_str).to eq('{2}')
|
69
|
+
expect(regexp.to_str).to eq('[p-t]{2}')
|
33
70
|
end
|
34
71
|
|
35
72
|
it "should parse 'optional' syntax" do
|
36
|
-
result = parse('optional')
|
73
|
+
result = parse(prefix + 'optional')
|
37
74
|
expect(result).to be_success
|
38
|
-
|
75
|
+
|
39
76
|
regexp = regexp_repr(result)
|
40
|
-
expect(regexp.to_str).to eq('?')
|
77
|
+
expect(regexp.to_str).to eq('[p-t]?')
|
41
78
|
end
|
42
79
|
|
43
80
|
it "should parse 'exactly ... times' syntax" do
|
44
|
-
result = parse('exactly 4 times')
|
81
|
+
result = parse('letter from a to f exactly 4 times')
|
45
82
|
expect(result).to be_success
|
46
|
-
|
83
|
+
|
47
84
|
regexp = regexp_repr(result)
|
48
|
-
expect(regexp.to_str).to eq('{4}')
|
85
|
+
expect(regexp.to_str).to eq('[a-f]{4}')
|
49
86
|
end
|
50
87
|
|
51
88
|
it "should parse 'between ... and ... times' syntax" do
|
52
|
-
result = parse('between 2 and 4 times')
|
89
|
+
result = parse(prefix + 'between 2 and 4 times')
|
53
90
|
expect(result).to be_success
|
54
91
|
|
55
92
|
# Dropping 'times' keyword is shorter syntax
|
56
|
-
expect(parse('between 2 and 4')).to be_success
|
57
|
-
|
93
|
+
expect(parse(prefix + 'between 2 and 4')).to be_success
|
94
|
+
|
58
95
|
regexp = regexp_repr(result)
|
59
|
-
expect(regexp.to_str).to eq('{2,
|
96
|
+
expect(regexp.to_str).to eq('[p-t]{2,4}')
|
60
97
|
end
|
61
98
|
|
99
|
+
|
62
100
|
it "should parse 'once or more' syntax" do
|
63
|
-
result = parse('once or more')
|
101
|
+
result = parse(prefix + 'once or more')
|
64
102
|
expect(result).to be_success
|
103
|
+
|
104
|
+
regexp = regexp_repr(result)
|
105
|
+
expect(regexp.to_str).to eq('[p-t]+')
|
65
106
|
end
|
66
107
|
|
67
108
|
it "should parse 'never or more' syntax" do
|
68
|
-
result = parse('never or more')
|
109
|
+
result = parse(prefix + 'never or more')
|
69
110
|
expect(result).to be_success
|
111
|
+
|
112
|
+
regexp = regexp_repr(result)
|
113
|
+
expect(regexp.to_str).to eq('[p-t]*')
|
70
114
|
end
|
71
115
|
|
72
116
|
it "should parse 'at least ... times' syntax" do
|
73
|
-
result = parse('at least 10 times')
|
117
|
+
result = parse(prefix + 'at least 10 times')
|
74
118
|
expect(result).to be_success
|
75
|
-
|
119
|
+
|
76
120
|
regexp = regexp_repr(result)
|
77
|
-
expect(regexp.to_str).to eq('{10,}')
|
121
|
+
expect(regexp.to_str).to eq('[p-t]{10,}')
|
78
122
|
end
|
79
|
-
|
80
123
|
end # context
|
81
|
-
|
82
124
|
end # describe
|
83
125
|
|
84
126
|
|
85
|
-
=begin
|
86
|
-
|
87
|
-
unless result.success?
|
88
|
-
# Stop if the parse failed...
|
89
|
-
puts "Parsing of '#{ARGV[0]}' failed"
|
90
|
-
puts "Reason: #{result.failure_reason.message}"
|
91
|
-
exit(1)
|
92
|
-
end
|
93
|
-
|
94
|
-
|
95
|
-
# Generate a concrete syntax parse tree from the parse result
|
96
|
-
cst_ptree = result.parse_tree
|
97
|
-
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
98
|
-
|
99
|
-
# Generate an abstract syntax parse tree from the parse result
|
100
|
-
tree_builder = ASTBuilder
|
101
|
-
ast_ptree = result.parse_tree(tree_builder)
|
102
|
-
=end
|
103
|
-
|