rley 0.5.08 → 0.5.09

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ # File: compound_expression.rb
2
+
3
+ require_relative "expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. An element that is part of a regular expression &
8
+ # that has its own child sub-expressions.
9
+ class CompoundExpression < Expression
10
+
11
+ public
12
+ # Redefined method. Return false since it may have one or more children.
13
+ def atomic?
14
+ return false
15
+ end
16
+
17
+ =begin
18
+ # Build a depth-first in-order children visitor.
19
+ # The visitor is implemented as an Enumerator.
20
+ def df_visitor()
21
+ root = children # The visit will start from the children of this object
22
+
23
+ visitor = Enumerator.new do |result| # result is a Yielder
24
+ # Initialization part: will run once
25
+ visit_stack = [ root ] # The LIFO queue of nodes to visit
26
+
27
+ begin # Traversal part (as a loop)
28
+ top = visit_stack.pop()
29
+ if top.kind_of?(Array)
30
+ if top.empty?
31
+ next
32
+ else
33
+ currChild = top.pop()
34
+ visit_stack.push top
35
+ end
36
+ else
37
+ currChild = top
38
+ end
39
+
40
+ result << currChild # Return the visited child
41
+
42
+ unless currChild.atomic?
43
+ children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
44
+ visit_stack.push(children_to_enqueue)
45
+ end
46
+ end until visit_stack.empty?
47
+ end
48
+ end
49
+ =end
50
+
51
+ protected
52
+ # Abstract method. Return the text representation of the child (if any)
53
+ def all_child_text() abstract_method()
54
+ end
55
+
56
+ end # class
57
+
58
+ end # module
59
+
60
+ # End of file
@@ -0,0 +1,42 @@
1
+ # File: expression.rb
2
+
3
+ require_relative 'abstract_method'
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. The generalization of any valid regular (sub)expression.
8
+ class Expression
9
+ def initialize()
10
+ end
11
+
12
+ public
13
+ # Abstract method. Return true iff the expression is atomic (= may not have any child).
14
+ def atomic? abstract_method
15
+ end
16
+
17
+ # Abstract method. Return the number of values that match this expression.
18
+ # [theParentOptions] an Hash of matching options. They are overridden by options with same name
19
+ # that are bound to this object.
20
+ def cardinality(theParentOptions) abstract_method
21
+ end
22
+
23
+ protected
24
+ # Determine the matching options to apply to this object, given the options coming from the parent
25
+ # and options that are local to this object. Local options take precedence.
26
+ # [theParentOptions] a Hash of matching options. They are overridden by options with same name
27
+ # that are bound to this object.
28
+ def options(theParentOptions)
29
+ resulting_options = theParentOptions.merge(@local_options)
30
+ return resulting_options
31
+ end
32
+
33
+ # Abstract conversion method.
34
+ # Purpose: Return the String representation of the expression.
35
+ def to_str() abstract_method
36
+ end
37
+
38
+ end # class
39
+
40
+ end # module
41
+
42
+ # End of file
@@ -0,0 +1,31 @@
1
+ # File: monadic_expression.rb
2
+
3
+ require_relative "compound_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. An element that is part of a regular expression &
8
+ # that can have up to one child sub-expression.
9
+ class MonadicExpression < CompoundExpression
10
+ # The (optional) child sub-expression
11
+ attr_reader(:child)
12
+
13
+ # Constructor.
14
+ def initialize(theChild)
15
+ super()
16
+ @child = theChild
17
+ end
18
+
19
+ protected
20
+ # Return the text representation of the child (if any)
21
+ def all_child_text()
22
+ result = child.nil? ? '' : child.to_str()
23
+
24
+ return result
25
+ end
26
+
27
+ end # class
28
+
29
+ end # module
30
+
31
+ # End of file
@@ -0,0 +1,64 @@
1
+ # File: polyadic_expression.rb
2
+
3
+ require_relative "compound_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. An element that is part of a regular expression &
8
+ # that has its own child sub-expressions.
9
+ class PolyadicExpression < CompoundExpression
10
+ # The aggregation of child elements
11
+ attr_reader(:children)
12
+
13
+ # Constructor.
14
+ def initialize(theChildren)
15
+ super()
16
+ @children = theChildren
17
+ end
18
+
19
+ public
20
+ # Append the given child to the list of children.
21
+ # TODO: assess whether to defer to a subclass NAryExpression
22
+ def <<(aChild)
23
+ @children << aChild
24
+
25
+ return self
26
+ end
27
+
28
+ # Build a depth-first in-order children visitor.
29
+ # The visitor is implemented as an Enumerator.
30
+ def df_visitor()
31
+ root = children # The visit will start from the children of this object
32
+
33
+ visitor = Enumerator.new do |result| # result is a Yielder
34
+ # Initialization part: will run once
35
+ visit_stack = [ root ] # The LIFO queue of nodes to visit
36
+
37
+ begin # Traversal part (as a loop)
38
+ top = visit_stack.pop()
39
+ if top.kind_of?(Array)
40
+ if top.empty?
41
+ next
42
+ else
43
+ currChild = top.pop()
44
+ visit_stack.push top
45
+ end
46
+ else
47
+ currChild = top
48
+ end
49
+
50
+ result << currChild # Return the visited child
51
+
52
+ unless currChild.atomic?
53
+ children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
54
+ visit_stack.push(children_to_enqueue)
55
+ end
56
+ end until visit_stack.empty?
57
+ end
58
+ end
59
+
60
+ end # class
61
+
62
+ end # module
63
+
64
+ # End of file
@@ -0,0 +1,28 @@
1
+ # File: quantifiable.rb
2
+
3
+ require_relative 'multiplicity'
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+
8
+ module Quantifiable
9
+
10
+ public
11
+ # Redefined method. Return true since it may not have any child.
12
+ def quantified?
13
+ return @quantifier.nil? ? false :true
14
+ end
15
+
16
+ def quantifier
17
+ @quantifier
18
+ end
19
+
20
+ def quantifier=(aQuantifier)
21
+ @quantifier = aQuantifier
22
+ end
23
+
24
+ end # class
25
+
26
+ end # module
27
+
28
+ # End of file
@@ -0,0 +1,31 @@
1
+ # File: repetition.rb
2
+
3
+ require_relative "monadic_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. An unary matching operator.
8
+ # It succeeds when the specified reptition of the child expression succeeds to match
9
+ # the subject text in the same serial arrangement
10
+ class Repetition < MonadicExpression
11
+ attr_reader(:multiplicity)
12
+
13
+ # Constructor.
14
+ def initialize(childExpressionToRepeat, aMultiplicity)
15
+ super(childExpressionToRepeat)
16
+ @multiplicity = aMultiplicity
17
+ end
18
+
19
+ public
20
+ # Conversion method re-definition.
21
+ # Purpose: Return the String representation of the concatented expressions.
22
+ def to_str()
23
+ result = all_child_text() + multiplicity.to_str()
24
+ return result
25
+ end
26
+
27
+ end # class
28
+
29
+ end # module
30
+
31
+ # End of file
@@ -1 +1,5 @@
1
- require_relative './regex/multiplicity'
1
+ require_relative './regex/character'
2
+ require_relative './regex/char_range'
3
+ require_relative './regex/multiplicity'
4
+ require_relative './regex/repetition'
5
+ require_relative './regex/char_class'
@@ -29,14 +29,18 @@ module SRL
29
29
  AT
30
30
  BETWEEN
31
31
  EXACTLY
32
+ FROM
32
33
  LEAST
34
+ LETTER
33
35
  MORE
34
36
  NEVER
35
37
  ONCE
36
38
  OPTIONAL
37
39
  OR
38
40
  TIMES
41
+ TO
39
42
  TWICE
43
+ UPPERCASE
40
44
  ].map { |x| [x, x] } .to_h
41
45
 
42
46
  class ScanError < StandardError; end
@@ -67,18 +71,17 @@ module SRL
67
71
  token = nil
68
72
 
69
73
  if '(),'.include? curr_ch
70
- # Single character token
74
+ # Delimiters, separators => single character token
71
75
  token = build_token(@@lexeme2name[curr_ch], scanner.getch)
72
76
  elsif (lexeme = scanner.scan(/[0-9]{2,}/))
73
77
  token = build_token('INTEGER', lexeme) # An integer has two or more digits
74
78
  elsif (lexeme = scanner.scan(/[0-9]/))
75
- token = build_token('DIGIT', lexeme)
79
+ token = build_token('DIGIT_LIT', lexeme)
76
80
  elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
77
81
  token = build_token(@@keywords[lexeme.upcase], lexeme)
78
82
  # TODO: handle case unknown identifier
79
- elsif (lexeme = scanner.scan(/\w/))
80
- puts 'Buff'
81
- token = build_token('CHAR', lexeme)
83
+ elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
84
+ token = build_token('LETTER_LIT', lexeme)
82
85
  else # Unknown token
83
86
  erroneous = curr_ch.nil? ? '' : curr_ch
84
87
  sequel = scanner.scan(/.{1,20}/)
@@ -15,89 +15,112 @@ describe 'Integration tests:' do
15
15
  regexp = tree.root
16
16
  end
17
17
 
18
+ context 'Parsing character ranges:' do
19
+ it "should parse 'letter from ... to ...' syntax" do
20
+ result = parse('letter from a to f')
21
+ expect(result).to be_success
22
+
23
+ regexp = regexp_repr(result)
24
+ expect(regexp.to_str).to eq('[a-f]')
25
+ end
26
+
27
+ it "should parse 'uppercase letter from ... to ...' syntax" do
28
+ result = parse('UPPERCASE letter from A to F')
29
+ expect(result).to be_success
30
+
31
+ regexp = regexp_repr(result)
32
+ expect(regexp.to_str).to eq('[A-F]')
33
+ end
34
+
35
+ it "should parse 'letter' syntax" do
36
+ result = parse('letter')
37
+ expect(result).to be_success
38
+
39
+ regexp = regexp_repr(result)
40
+ expect(regexp.to_str).to eq('[a-z]')
41
+ end
42
+
43
+ it "should parse 'uppercase letter' syntax" do
44
+ result = parse('uppercase letter')
45
+ expect(result).to be_success
46
+
47
+ regexp = regexp_repr(result)
48
+ expect(regexp.to_str).to eq('[A-Z]')
49
+ end
50
+
51
+ end # context
52
+
18
53
  context 'Parsing quantifiers:' do
54
+ let(:prefix) { 'letter from p to t ' }
55
+
19
56
  it "should parse 'once' syntax" do
20
- result = parse('once')
57
+ result = parse(prefix + 'once')
21
58
  expect(result).to be_success
22
59
 
23
60
  regexp = regexp_repr(result)
24
- expect(regexp.to_str).to eq('{1}')
61
+ expect(regexp.to_str).to eq('[p-t]{1}')
25
62
  end
26
63
 
27
64
  it "should parse 'twice' syntax" do
28
- result = parse('twice')
65
+ result = parse(prefix + 'twice')
29
66
  expect(result).to be_success
30
-
67
+
31
68
  regexp = regexp_repr(result)
32
- expect(regexp.to_str).to eq('{2}')
69
+ expect(regexp.to_str).to eq('[p-t]{2}')
33
70
  end
34
71
 
35
72
  it "should parse 'optional' syntax" do
36
- result = parse('optional')
73
+ result = parse(prefix + 'optional')
37
74
  expect(result).to be_success
38
-
75
+
39
76
  regexp = regexp_repr(result)
40
- expect(regexp.to_str).to eq('?')
77
+ expect(regexp.to_str).to eq('[p-t]?')
41
78
  end
42
79
 
43
80
  it "should parse 'exactly ... times' syntax" do
44
- result = parse('exactly 4 times')
81
+ result = parse('letter from a to f exactly 4 times')
45
82
  expect(result).to be_success
46
-
83
+
47
84
  regexp = regexp_repr(result)
48
- expect(regexp.to_str).to eq('{4}')
85
+ expect(regexp.to_str).to eq('[a-f]{4}')
49
86
  end
50
87
 
51
88
  it "should parse 'between ... and ... times' syntax" do
52
- result = parse('between 2 and 4 times')
89
+ result = parse(prefix + 'between 2 and 4 times')
53
90
  expect(result).to be_success
54
91
 
55
92
  # Dropping 'times' keyword is shorter syntax
56
- expect(parse('between 2 and 4')).to be_success
57
-
93
+ expect(parse(prefix + 'between 2 and 4')).to be_success
94
+
58
95
  regexp = regexp_repr(result)
59
- expect(regexp.to_str).to eq('{2, 4}')
96
+ expect(regexp.to_str).to eq('[p-t]{2,4}')
60
97
  end
61
98
 
99
+
62
100
  it "should parse 'once or more' syntax" do
63
- result = parse('once or more')
101
+ result = parse(prefix + 'once or more')
64
102
  expect(result).to be_success
103
+
104
+ regexp = regexp_repr(result)
105
+ expect(regexp.to_str).to eq('[p-t]+')
65
106
  end
66
107
 
67
108
  it "should parse 'never or more' syntax" do
68
- result = parse('never or more')
109
+ result = parse(prefix + 'never or more')
69
110
  expect(result).to be_success
111
+
112
+ regexp = regexp_repr(result)
113
+ expect(regexp.to_str).to eq('[p-t]*')
70
114
  end
71
115
 
72
116
  it "should parse 'at least ... times' syntax" do
73
- result = parse('at least 10 times')
117
+ result = parse(prefix + 'at least 10 times')
74
118
  expect(result).to be_success
75
-
119
+
76
120
  regexp = regexp_repr(result)
77
- expect(regexp.to_str).to eq('{10,}')
121
+ expect(regexp.to_str).to eq('[p-t]{10,}')
78
122
  end
79
-
80
123
  end # context
81
-
82
124
  end # describe
83
125
 
84
126
 
85
- =begin
86
-
87
- unless result.success?
88
- # Stop if the parse failed...
89
- puts "Parsing of '#{ARGV[0]}' failed"
90
- puts "Reason: #{result.failure_reason.message}"
91
- exit(1)
92
- end
93
-
94
-
95
- # Generate a concrete syntax parse tree from the parse result
96
- cst_ptree = result.parse_tree
97
- print_tree('Concrete Syntax Tree (CST)', cst_ptree)
98
-
99
- # Generate an abstract syntax parse tree from the parse result
100
- tree_builder = ASTBuilder
101
- ast_ptree = result.parse_tree(tree_builder)
102
- =end
103
-