rley 0.5.08 → 0.5.09

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,60 @@
1
+ # File: compound_expression.rb
2
+
3
+ require_relative "expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. An element that is part of a regular expression &
8
+ # that has its own child sub-expressions.
9
+ class CompoundExpression < Expression
10
+
11
+ public
12
+ # Redefined method. Return false since it may have one or more children.
13
+ def atomic?
14
+ return false
15
+ end
16
+
17
+ =begin
18
+ # Build a depth-first in-order children visitor.
19
+ # The visitor is implemented as an Enumerator.
20
+ def df_visitor()
21
+ root = children # The visit will start from the children of this object
22
+
23
+ visitor = Enumerator.new do |result| # result is a Yielder
24
+ # Initialization part: will run once
25
+ visit_stack = [ root ] # The LIFO queue of nodes to visit
26
+
27
+ begin # Traversal part (as a loop)
28
+ top = visit_stack.pop()
29
+ if top.kind_of?(Array)
30
+ if top.empty?
31
+ next
32
+ else
33
+ currChild = top.pop()
34
+ visit_stack.push top
35
+ end
36
+ else
37
+ currChild = top
38
+ end
39
+
40
+ result << currChild # Return the visited child
41
+
42
+ unless currChild.atomic?
43
+ children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
44
+ visit_stack.push(children_to_enqueue)
45
+ end
46
+ end until visit_stack.empty?
47
+ end
48
+ end
49
+ =end
50
+
51
+ protected
52
+ # Abstract method. Return the text representation of the child (if any)
53
+ def all_child_text() abstract_method()
54
+ end
55
+
56
+ end # class
57
+
58
+ end # module
59
+
60
+ # End of file
@@ -0,0 +1,42 @@
1
+ # File: expression.rb
2
+
3
+ require_relative 'abstract_method'
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. The generalization of any valid regular (sub)expression.
8
+ class Expression
9
+ def initialize()
10
+ end
11
+
12
+ public
13
+ # Abstract method. Return true iff the expression is atomic (= may not have any child).
14
+ def atomic? abstract_method
15
+ end
16
+
17
+ # Abstract method. Return the number of values that match this expression.
18
+ # [theParentOptions] an Hash of matching options. They are overridden by options with same name
19
+ # that are bound to this object.
20
+ def cardinality(theParentOptions) abstract_method
21
+ end
22
+
23
+ protected
24
+ # Determine the matching options to apply to this object, given the options coming from the parent
25
+ # and options that are local to this object. Local options take precedence.
26
+ # [theParentOptions] a Hash of matching options. They are overridden by options with same name
27
+ # that are bound to this object.
28
+ def options(theParentOptions)
29
+ resulting_options = theParentOptions.merge(@local_options)
30
+ return resulting_options
31
+ end
32
+
33
+ # Abstract conversion method.
34
+ # Purpose: Return the String representation of the expression.
35
+ def to_str() abstract_method
36
+ end
37
+
38
+ end # class
39
+
40
+ end # module
41
+
42
+ # End of file
@@ -0,0 +1,31 @@
1
+ # File: monadic_expression.rb
2
+
3
+ require_relative "compound_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. An element that is part of a regular expression &
8
+ # that can have up to one child sub-expression.
9
+ class MonadicExpression < CompoundExpression
10
+ # The (optional) child sub-expression
11
+ attr_reader(:child)
12
+
13
+ # Constructor.
14
+ def initialize(theChild)
15
+ super()
16
+ @child = theChild
17
+ end
18
+
19
+ protected
20
+ # Return the text representation of the child (if any)
21
+ def all_child_text()
22
+ result = child.nil? ? '' : child.to_str()
23
+
24
+ return result
25
+ end
26
+
27
+ end # class
28
+
29
+ end # module
30
+
31
+ # End of file
@@ -0,0 +1,64 @@
1
+ # File: polyadic_expression.rb
2
+
3
+ require_relative "compound_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. An element that is part of a regular expression &
8
+ # that has its own child sub-expressions.
9
+ class PolyadicExpression < CompoundExpression
10
+ # The aggregation of child elements
11
+ attr_reader(:children)
12
+
13
+ # Constructor.
14
+ def initialize(theChildren)
15
+ super()
16
+ @children = theChildren
17
+ end
18
+
19
+ public
20
+ # Append the given child to the list of children.
21
+ # TODO: assess whether to defer to a subclass NAryExpression
22
+ def <<(aChild)
23
+ @children << aChild
24
+
25
+ return self
26
+ end
27
+
28
+ # Build a depth-first in-order children visitor.
29
+ # The visitor is implemented as an Enumerator.
30
+ def df_visitor()
31
+ root = children # The visit will start from the children of this object
32
+
33
+ visitor = Enumerator.new do |result| # result is a Yielder
34
+ # Initialization part: will run once
35
+ visit_stack = [ root ] # The LIFO queue of nodes to visit
36
+
37
+ begin # Traversal part (as a loop)
38
+ top = visit_stack.pop()
39
+ if top.kind_of?(Array)
40
+ if top.empty?
41
+ next
42
+ else
43
+ currChild = top.pop()
44
+ visit_stack.push top
45
+ end
46
+ else
47
+ currChild = top
48
+ end
49
+
50
+ result << currChild # Return the visited child
51
+
52
+ unless currChild.atomic?
53
+ children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
54
+ visit_stack.push(children_to_enqueue)
55
+ end
56
+ end until visit_stack.empty?
57
+ end
58
+ end
59
+
60
+ end # class
61
+
62
+ end # module
63
+
64
+ # End of file
@@ -0,0 +1,28 @@
1
+ # File: quantifiable.rb
2
+
3
+ require_relative 'multiplicity'
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+
8
+ module Quantifiable
9
+
10
+ public
11
+ # Redefined method. Return true since it may not have any child.
12
+ def quantified?
13
+ return @quantifier.nil? ? false :true
14
+ end
15
+
16
+ def quantifier
17
+ @quantifier
18
+ end
19
+
20
+ def quantifier=(aQuantifier)
21
+ @quantifier = aQuantifier
22
+ end
23
+
24
+ end # class
25
+
26
+ end # module
27
+
28
+ # End of file
@@ -0,0 +1,31 @@
1
+ # File: repetition.rb
2
+
3
+ require_relative "monadic_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. An unary matching operator.
8
+ # It succeeds when the specified reptition of the child expression succeeds to match
9
+ # the subject text in the same serial arrangement
10
+ class Repetition < MonadicExpression
11
+ attr_reader(:multiplicity)
12
+
13
+ # Constructor.
14
+ def initialize(childExpressionToRepeat, aMultiplicity)
15
+ super(childExpressionToRepeat)
16
+ @multiplicity = aMultiplicity
17
+ end
18
+
19
+ public
20
+ # Conversion method re-definition.
21
+ # Purpose: Return the String representation of the concatented expressions.
22
+ def to_str()
23
+ result = all_child_text() + multiplicity.to_str()
24
+ return result
25
+ end
26
+
27
+ end # class
28
+
29
+ end # module
30
+
31
+ # End of file
@@ -1 +1,5 @@
1
- require_relative './regex/multiplicity'
1
+ require_relative './regex/character'
2
+ require_relative './regex/char_range'
3
+ require_relative './regex/multiplicity'
4
+ require_relative './regex/repetition'
5
+ require_relative './regex/char_class'
@@ -29,14 +29,18 @@ module SRL
29
29
  AT
30
30
  BETWEEN
31
31
  EXACTLY
32
+ FROM
32
33
  LEAST
34
+ LETTER
33
35
  MORE
34
36
  NEVER
35
37
  ONCE
36
38
  OPTIONAL
37
39
  OR
38
40
  TIMES
41
+ TO
39
42
  TWICE
43
+ UPPERCASE
40
44
  ].map { |x| [x, x] } .to_h
41
45
 
42
46
  class ScanError < StandardError; end
@@ -67,18 +71,17 @@ module SRL
67
71
  token = nil
68
72
 
69
73
  if '(),'.include? curr_ch
70
- # Single character token
74
+ # Delimiters, separators => single character token
71
75
  token = build_token(@@lexeme2name[curr_ch], scanner.getch)
72
76
  elsif (lexeme = scanner.scan(/[0-9]{2,}/))
73
77
  token = build_token('INTEGER', lexeme) # An integer has two or more digits
74
78
  elsif (lexeme = scanner.scan(/[0-9]/))
75
- token = build_token('DIGIT', lexeme)
79
+ token = build_token('DIGIT_LIT', lexeme)
76
80
  elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
77
81
  token = build_token(@@keywords[lexeme.upcase], lexeme)
78
82
  # TODO: handle case unknown identifier
79
- elsif (lexeme = scanner.scan(/\w/))
80
- puts 'Buff'
81
- token = build_token('CHAR', lexeme)
83
+ elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
84
+ token = build_token('LETTER_LIT', lexeme)
82
85
  else # Unknown token
83
86
  erroneous = curr_ch.nil? ? '' : curr_ch
84
87
  sequel = scanner.scan(/.{1,20}/)
@@ -15,89 +15,112 @@ describe 'Integration tests:' do
15
15
  regexp = tree.root
16
16
  end
17
17
 
18
+ context 'Parsing character ranges:' do
19
+ it "should parse 'letter from ... to ...' syntax" do
20
+ result = parse('letter from a to f')
21
+ expect(result).to be_success
22
+
23
+ regexp = regexp_repr(result)
24
+ expect(regexp.to_str).to eq('[a-f]')
25
+ end
26
+
27
+ it "should parse 'uppercase letter from ... to ...' syntax" do
28
+ result = parse('UPPERCASE letter from A to F')
29
+ expect(result).to be_success
30
+
31
+ regexp = regexp_repr(result)
32
+ expect(regexp.to_str).to eq('[A-F]')
33
+ end
34
+
35
+ it "should parse 'letter' syntax" do
36
+ result = parse('letter')
37
+ expect(result).to be_success
38
+
39
+ regexp = regexp_repr(result)
40
+ expect(regexp.to_str).to eq('[a-z]')
41
+ end
42
+
43
+ it "should parse 'uppercase letter' syntax" do
44
+ result = parse('uppercase letter')
45
+ expect(result).to be_success
46
+
47
+ regexp = regexp_repr(result)
48
+ expect(regexp.to_str).to eq('[A-Z]')
49
+ end
50
+
51
+ end # context
52
+
18
53
  context 'Parsing quantifiers:' do
54
+ let(:prefix) { 'letter from p to t ' }
55
+
19
56
  it "should parse 'once' syntax" do
20
- result = parse('once')
57
+ result = parse(prefix + 'once')
21
58
  expect(result).to be_success
22
59
 
23
60
  regexp = regexp_repr(result)
24
- expect(regexp.to_str).to eq('{1}')
61
+ expect(regexp.to_str).to eq('[p-t]{1}')
25
62
  end
26
63
 
27
64
  it "should parse 'twice' syntax" do
28
- result = parse('twice')
65
+ result = parse(prefix + 'twice')
29
66
  expect(result).to be_success
30
-
67
+
31
68
  regexp = regexp_repr(result)
32
- expect(regexp.to_str).to eq('{2}')
69
+ expect(regexp.to_str).to eq('[p-t]{2}')
33
70
  end
34
71
 
35
72
  it "should parse 'optional' syntax" do
36
- result = parse('optional')
73
+ result = parse(prefix + 'optional')
37
74
  expect(result).to be_success
38
-
75
+
39
76
  regexp = regexp_repr(result)
40
- expect(regexp.to_str).to eq('?')
77
+ expect(regexp.to_str).to eq('[p-t]?')
41
78
  end
42
79
 
43
80
  it "should parse 'exactly ... times' syntax" do
44
- result = parse('exactly 4 times')
81
+ result = parse('letter from a to f exactly 4 times')
45
82
  expect(result).to be_success
46
-
83
+
47
84
  regexp = regexp_repr(result)
48
- expect(regexp.to_str).to eq('{4}')
85
+ expect(regexp.to_str).to eq('[a-f]{4}')
49
86
  end
50
87
 
51
88
  it "should parse 'between ... and ... times' syntax" do
52
- result = parse('between 2 and 4 times')
89
+ result = parse(prefix + 'between 2 and 4 times')
53
90
  expect(result).to be_success
54
91
 
55
92
  # Dropping 'times' keyword is shorter syntax
56
- expect(parse('between 2 and 4')).to be_success
57
-
93
+ expect(parse(prefix + 'between 2 and 4')).to be_success
94
+
58
95
  regexp = regexp_repr(result)
59
- expect(regexp.to_str).to eq('{2, 4}')
96
+ expect(regexp.to_str).to eq('[p-t]{2,4}')
60
97
  end
61
98
 
99
+
62
100
  it "should parse 'once or more' syntax" do
63
- result = parse('once or more')
101
+ result = parse(prefix + 'once or more')
64
102
  expect(result).to be_success
103
+
104
+ regexp = regexp_repr(result)
105
+ expect(regexp.to_str).to eq('[p-t]+')
65
106
  end
66
107
 
67
108
  it "should parse 'never or more' syntax" do
68
- result = parse('never or more')
109
+ result = parse(prefix + 'never or more')
69
110
  expect(result).to be_success
111
+
112
+ regexp = regexp_repr(result)
113
+ expect(regexp.to_str).to eq('[p-t]*')
70
114
  end
71
115
 
72
116
  it "should parse 'at least ... times' syntax" do
73
- result = parse('at least 10 times')
117
+ result = parse(prefix + 'at least 10 times')
74
118
  expect(result).to be_success
75
-
119
+
76
120
  regexp = regexp_repr(result)
77
- expect(regexp.to_str).to eq('{10,}')
121
+ expect(regexp.to_str).to eq('[p-t]{10,}')
78
122
  end
79
-
80
123
  end # context
81
-
82
124
  end # describe
83
125
 
84
126
 
85
- =begin
86
-
87
- unless result.success?
88
- # Stop if the parse failed...
89
- puts "Parsing of '#{ARGV[0]}' failed"
90
- puts "Reason: #{result.failure_reason.message}"
91
- exit(1)
92
- end
93
-
94
-
95
- # Generate a concrete syntax parse tree from the parse result
96
- cst_ptree = result.parse_tree
97
- print_tree('Concrete Syntax Tree (CST)', cst_ptree)
98
-
99
- # Generate an abstract syntax parse tree from the parse result
100
- tree_builder = ASTBuilder
101
- ast_ptree = result.parse_tree(tree_builder)
102
- =end
103
-