rley 0.5.10 → 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -1
- data/appveyor.yml +6 -5
- data/examples/NLP/engtagger.rb +176 -0
- data/examples/general/SRL/lib/ast_builder.rb +217 -21
- data/examples/general/SRL/lib/grammar.rb +33 -5
- data/examples/general/SRL/lib/regex/alternation.rb +30 -0
- data/examples/general/SRL/lib/regex/char_class.rb +28 -22
- data/examples/general/SRL/lib/regex/char_shorthand.rb +50 -0
- data/examples/general/SRL/lib/regex/character.rb +5 -3
- data/examples/general/SRL/lib/regex/concatenation.rb +32 -0
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +29 -0
- data/examples/general/SRL/lib/regex/wildcard.rb +26 -0
- data/examples/general/SRL/lib/regex_repr.rb +5 -0
- data/examples/general/SRL/lib/tokenizer.rb +28 -3
- data/examples/general/SRL/spec/integration_spec.rb +151 -8
- data/examples/general/SRL/spec/tokenizer_spec.rb +12 -0
- data/examples/general/left.rb +36 -0
- data/examples/general/right.rb +36 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/edge.rb +12 -1
- data/lib/rley/gfg/grm_flow_graph.rb +21 -1
- data/lib/rley/gfg/item_vertex.rb +1 -1
- data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
- data/lib/rley/gfg/start_vertex.rb +1 -0
- data/lib/rley/gfg/vertex.rb +27 -0
- data/lib/rley/lexical/token.rb +1 -0
- data/lib/rley/parser/error_reason.rb +2 -1
- data/lib/rley/parser/gfg_chart.rb +14 -0
- data/lib/rley/parser/gfg_earley_parser.rb +0 -1
- data/lib/rley/parser/gfg_parsing.rb +4 -3
- data/lib/rley/parser/parse_entry.rb +33 -3
- data/lib/rley/parser/parse_entry_set.rb +14 -2
- data/lib/rley/parser/parse_tree_builder.rb +1 -1
- data/lib/rley/parser/parse_walker_factory.rb +0 -1
- data/lib/rley/syntax/grm_symbol.rb +2 -0
- data/lib/rley/syntax/production.rb +15 -3
- data/lib/rley/syntax/symbol_seq.rb +16 -1
- data/spec/rley/gfg/end_vertex_spec.rb +9 -1
- data/spec/rley/gfg/grm_flow_graph_spec.rb +9 -0
- data/spec/rley/gfg/item_vertex_spec.rb +9 -0
- data/spec/rley/gfg/start_vertex_spec.rb +9 -1
- data/spec/rley/parser/gfg_parsing_spec.rb +0 -1
- data/spec/rley/parser/parse_entry_set_spec.rb +15 -0
- data/spec/rley/parser/parse_entry_spec.rb +24 -13
- data/spec/rley/parser/parse_tracer_spec.rb +1 -1
- data/spec/rley/syntax/production_spec.rb +10 -0
- data/spec/rley/syntax/symbol_seq_spec.rb +5 -0
- metadata +10 -2
@@ -6,25 +6,53 @@ module SRL
|
|
6
6
|
# This is a very partial grammar of SRL.
|
7
7
|
# It will be expanded with the coming versions of Rley
|
8
8
|
builder = Rley::Syntax::GrammarBuilder.new do
|
9
|
+
add_terminals('LPAREN', 'RPAREN', 'COMMA')
|
9
10
|
add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
|
11
|
+
add_terminals('LITERALLY', 'STRING_LIT')
|
10
12
|
add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
|
11
|
-
add_terminals('DIGIT', 'NUMBER')
|
13
|
+
add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
|
14
|
+
add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
|
15
|
+
add_terminals('TAB', 'BACKSLASH', 'NEW', 'LINE')
|
16
|
+
add_terminals('OF', 'ONE')
|
12
17
|
add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
|
13
18
|
add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
|
14
19
|
add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
|
15
20
|
|
16
|
-
|
17
|
-
rule '
|
21
|
+
rule 'srl' => 'pattern'
|
22
|
+
rule 'pattern' => %w[pattern COMMA quantifiable]
|
23
|
+
rule 'pattern' => %w[pattern quantifiable]
|
24
|
+
rule 'pattern' => 'quantifiable'
|
25
|
+
rule 'quantifiable' => 'term'
|
26
|
+
rule 'quantifiable' => %w[term quantifier]
|
18
27
|
rule 'term' => 'atom'
|
19
|
-
rule 'term' =>
|
28
|
+
rule 'term' => 'alternation'
|
29
|
+
rule 'term' => 'grouping'
|
20
30
|
rule 'atom' => 'letter_range'
|
21
31
|
rule 'atom' => 'digit_range'
|
32
|
+
rule 'atom' => 'character_class'
|
33
|
+
rule 'atom' => 'special_char'
|
34
|
+
rule 'atom' => 'literal'
|
22
35
|
rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
|
23
36
|
rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
|
24
37
|
rule 'letter_range' => 'LETTER'
|
25
38
|
rule 'letter_range' => %w[UPPERCASE LETTER]
|
26
39
|
rule 'digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]
|
27
|
-
rule 'digit_range' => 'digit_or_number'
|
40
|
+
rule 'digit_range' => 'digit_or_number'
|
41
|
+
rule 'character_class' => %w[ANY CHARACTER]
|
42
|
+
rule 'character_class' => %w[NO CHARACTER]
|
43
|
+
rule 'character_class' => 'WHITESPACE'
|
44
|
+
rule 'character_class' => %w[NO WHITESPACE]
|
45
|
+
rule 'character_class' => 'ANYTHING'
|
46
|
+
rule 'character_class' => %w[ONE OF STRING_LIT]
|
47
|
+
rule 'special_char' => 'TAB'
|
48
|
+
rule 'special_char' => 'BACKSLASH'
|
49
|
+
rule 'special_char' => %w[NEW LINE]
|
50
|
+
rule 'literal' => %w[LITERALLY STRING_LIT]
|
51
|
+
rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
|
52
|
+
rule 'alternatives' => %w[alternatives COMMA quantifiable]
|
53
|
+
rule 'alternatives' => %w[alternatives quantifiable]
|
54
|
+
rule 'alternatives' => 'quantifiable'
|
55
|
+
rule 'grouping' => %w[LPAREN pattern RPAREN]
|
28
56
|
rule 'quantifier' => 'ONCE'
|
29
57
|
rule 'quantifier' => 'TWICE'
|
30
58
|
rule 'quantifier' => %w[EXACTLY count TIMES]
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# File: alternation.rb
|
2
|
+
|
3
|
+
require_relative 'polyadic_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. A n-ary matching operator.
|
8
|
+
# It succeeds when one child expression succeeds to match the subject text
|
9
|
+
class Alternation < PolyadicExpression
|
10
|
+
|
11
|
+
# Constructor.
|
12
|
+
def initialize(*theChildren)
|
13
|
+
super(theChildren)
|
14
|
+
end
|
15
|
+
|
16
|
+
public
|
17
|
+
# Conversion method re-definition.
|
18
|
+
# Purpose: Return the String representation of the concatented expressions.
|
19
|
+
def to_str()
|
20
|
+
result_children = children.map { |aChild| aChild.to_str() }
|
21
|
+
result = '(?:' + result_children.join('|') + ')'
|
22
|
+
|
23
|
+
return result
|
24
|
+
end
|
25
|
+
|
26
|
+
end # class
|
27
|
+
|
28
|
+
end # module
|
29
|
+
|
30
|
+
# End of file
|
@@ -4,30 +4,36 @@ require_relative "polyadic_expression" # Access the superclass
|
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
6
|
|
7
|
-
# Abstract class. A n-ary matching operator.
|
8
|
-
# It succeeds when one child expression succeeds to match the subject text
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
7
|
+
# Abstract class. A n-ary matching operator.
|
8
|
+
# It succeeds when one child expression succeeds to match the subject text.
|
9
|
+
class CharClass < PolyadicExpression
|
10
|
+
# These are characters with special meaning in character classes
|
11
|
+
Metachars = ']\^-'.codepoints
|
12
|
+
# A flag that indicates whether the character is negated
|
13
|
+
attr_reader(:negated)
|
14
|
+
|
15
|
+
# Constructor.
|
16
|
+
def initialize(to_negate,*theChildren)
|
17
|
+
super(theChildren)
|
18
|
+
@negated = to_negate
|
19
|
+
end
|
19
20
|
|
20
|
-
public
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
21
|
+
public
|
22
|
+
# Conversion method re-definition.
|
23
|
+
# Purpose: Return the String representation of the character class.
|
24
|
+
def to_str()
|
25
|
+
result_children = children.inject('') do |subResult, aChild|
|
26
|
+
if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
|
27
|
+
subResult << "\\" # Escape meta-character...
|
28
|
+
end
|
29
|
+
subResult << aChild.to_str()
|
30
|
+
end
|
31
|
+
result = '['+ (negated ? '^' : '') + result_children + ']'
|
32
|
+
|
33
|
+
return result
|
34
|
+
end
|
29
35
|
|
30
|
-
end # class
|
36
|
+
end # class
|
31
37
|
|
32
38
|
end # module
|
33
39
|
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# File: char_shorthand.rb
|
2
|
+
|
3
|
+
require_relative "atomic_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# A pre-defined character class is in essence a name for a built-in, standard character class.
|
8
|
+
class CharShorthand < AtomicExpression
|
9
|
+
# A constant Hash that defines all the predefined character shorthands.
|
10
|
+
# It contains pairs of the form:
|
11
|
+
# a pre-defined character shorthand letter => a CharRange object
|
12
|
+
StandardCClasses = {
|
13
|
+
'd' => '[0-9]',
|
14
|
+
'D' => '[^0-9]',
|
15
|
+
'h' => '[0-9a-fA-F]',
|
16
|
+
'H' => '[^0-9a-fA-F]',
|
17
|
+
's' => '[ \t\r\n\f]',
|
18
|
+
'S' => '[^ \t\r\n\f]',
|
19
|
+
'w' => '[0-9a-zA-Z_]',
|
20
|
+
'W' => '[^0-9a-zA-Z_]'
|
21
|
+
}
|
22
|
+
|
23
|
+
# An one-letter abbreviation
|
24
|
+
attr_reader(:shortname)
|
25
|
+
|
26
|
+
# Constructor
|
27
|
+
def initialize(aShortname)
|
28
|
+
@shortname = valid_shortname(aShortname)
|
29
|
+
end
|
30
|
+
|
31
|
+
public
|
32
|
+
# Conversion method re-definition.
|
33
|
+
# Purpose: Return the String representation of the expression.
|
34
|
+
def to_str()
|
35
|
+
return "\\#{shortname}"
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
# Return the validated short name.
|
40
|
+
def valid_shortname(aShortname)
|
41
|
+
raise StandardError, "Unknown predefined character class \\#{aShortname}" unless StandardCClasses.include? aShortname
|
42
|
+
|
43
|
+
return aShortname
|
44
|
+
end
|
45
|
+
|
46
|
+
end # class
|
47
|
+
|
48
|
+
end # module
|
49
|
+
|
50
|
+
# End of file
|
@@ -25,6 +25,8 @@ class Character < AtomicExpression
|
|
25
25
|
"\\6" => 6,
|
26
26
|
"\\7" => 7
|
27
27
|
}
|
28
|
+
|
29
|
+
MetaChars = '\^$+?.'
|
28
30
|
|
29
31
|
# The integer value that uniquely identifies the character.
|
30
32
|
attr_reader(:codepoint)
|
@@ -63,7 +65,7 @@ class Character < AtomicExpression
|
|
63
65
|
end
|
64
66
|
@lexeme = aValue
|
65
67
|
|
66
|
-
when
|
68
|
+
when Integer
|
67
69
|
@codepoint = aValue
|
68
70
|
else
|
69
71
|
raise StandardError, "Cannot initialize a Character with a '#{aValue}'."
|
@@ -125,14 +127,14 @@ public
|
|
125
127
|
# newOne == newOne # true. Identity
|
126
128
|
# newOne == Character.new(?\u03a3) # true. Both have same codepoint
|
127
129
|
# newOne == ?\u03a3 # true. The single character String match exactly the char attribute.
|
128
|
-
# newOne == 0x03a3 # true. The
|
130
|
+
# newOne == 0x03a3 # true. The Integer is compared to the codepoint value.
|
129
131
|
# Will test equality with any Object that knows the to_s method
|
130
132
|
def ==(another)
|
131
133
|
result = case another
|
132
134
|
when Character
|
133
135
|
self.to_str == another.to_str
|
134
136
|
|
135
|
-
when
|
137
|
+
when Integer
|
136
138
|
self.codepoint == another
|
137
139
|
|
138
140
|
when String
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# File: concatenation.rb
|
2
|
+
|
3
|
+
require_relative 'polyadic_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# Abstract class. A n-ary matching operator.
|
8
|
+
# It succeeds when each child succeeds to match the subject text in the same
|
9
|
+
# serial arrangement than defined by this concatenation.
|
10
|
+
class Concatenation < PolyadicExpression
|
11
|
+
|
12
|
+
# Constructor.
|
13
|
+
def initialize(*theChildren)
|
14
|
+
super(theChildren)
|
15
|
+
end
|
16
|
+
|
17
|
+
public
|
18
|
+
# Conversion method re-definition.
|
19
|
+
# Purpose: Return the String representation of the concatented expressions.
|
20
|
+
def to_str()
|
21
|
+
result = children.inject('') { |result, aChild|
|
22
|
+
result << aChild.to_str()
|
23
|
+
}
|
24
|
+
|
25
|
+
return result
|
26
|
+
end
|
27
|
+
|
28
|
+
end # class
|
29
|
+
|
30
|
+
end # module
|
31
|
+
|
32
|
+
# End of file
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# File: non_capturing_group.rb
|
2
|
+
|
3
|
+
require_relative "monadic_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# A non-capturing group, in other word it is a pure grouping of sub-expressions
|
8
|
+
class NonCapturingGroup < MonadicExpression
|
9
|
+
|
10
|
+
# Constructor.
|
11
|
+
# [aChildExpression] A sub-expression to match. When successful
|
12
|
+
# the matching text is assigned to the capture variable.
|
13
|
+
def initialize(aChildExpression)
|
14
|
+
super(aChildExpression)
|
15
|
+
end
|
16
|
+
|
17
|
+
public
|
18
|
+
# Conversion method re-definition.
|
19
|
+
# Purpose: Return the String representation of the captured expression.
|
20
|
+
def to_str()
|
21
|
+
result = '(?:' + all_child_text() + ")"
|
22
|
+
return result
|
23
|
+
end
|
24
|
+
|
25
|
+
end # class
|
26
|
+
|
27
|
+
end # module
|
28
|
+
|
29
|
+
# End of file
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# File: wildcard.rb
|
2
|
+
|
3
|
+
require_relative 'atomic_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# A wildcard matches any character (except for the newline).
|
8
|
+
class Wildcard < AtomicExpression
|
9
|
+
|
10
|
+
# Constructor
|
11
|
+
def initialize()
|
12
|
+
super
|
13
|
+
end
|
14
|
+
|
15
|
+
public
|
16
|
+
# Conversion method re-definition.
|
17
|
+
# Purpose: Return the String representation of the expression.
|
18
|
+
def to_str()
|
19
|
+
return '.'
|
20
|
+
end
|
21
|
+
|
22
|
+
end # class
|
23
|
+
|
24
|
+
end # module
|
25
|
+
|
26
|
+
# End of file
|
@@ -1,5 +1,10 @@
|
|
1
1
|
require_relative './regex/character'
|
2
2
|
require_relative './regex/char_range'
|
3
|
+
require_relative './regex/concatenation'
|
3
4
|
require_relative './regex/multiplicity'
|
4
5
|
require_relative './regex/repetition'
|
5
6
|
require_relative './regex/char_class'
|
7
|
+
require_relative './regex/char_shorthand'
|
8
|
+
require_relative './regex/wildcard'
|
9
|
+
require_relative './regex/alternation'
|
10
|
+
require_relative './regex/non_capturing_group'
|
@@ -26,23 +26,35 @@ module SRL
|
|
26
26
|
# Here are all the SRL keywords (in uppercase)
|
27
27
|
@@keywords = %w[
|
28
28
|
AND
|
29
|
+
ANY
|
30
|
+
ANYTHING
|
29
31
|
AT
|
32
|
+
BACKSLASH
|
30
33
|
BETWEEN
|
34
|
+
CHARACTER
|
31
35
|
DIGIT
|
32
36
|
EXACTLY
|
33
37
|
FROM
|
34
38
|
LEAST
|
35
39
|
LETTER
|
40
|
+
LINE
|
41
|
+
LITERALLY
|
36
42
|
MORE
|
37
43
|
NEVER
|
44
|
+
NEW
|
45
|
+
NO
|
38
46
|
NUMBER
|
47
|
+
OF
|
39
48
|
ONCE
|
49
|
+
ONE
|
40
50
|
OPTIONAL
|
41
51
|
OR
|
52
|
+
TAB
|
42
53
|
TIMES
|
43
54
|
TO
|
44
55
|
TWICE
|
45
56
|
UPPERCASE
|
57
|
+
WHITESPACE
|
46
58
|
].map { |x| [x, x] } .to_h
|
47
59
|
|
48
60
|
class ScanError < StandardError; end
|
@@ -68,7 +80,7 @@ module SRL
|
|
68
80
|
def _next_token()
|
69
81
|
skip_whitespaces
|
70
82
|
curr_ch = scanner.peek(1)
|
71
|
-
return nil if curr_ch.nil?
|
83
|
+
return nil if curr_ch.nil? || curr_ch.empty?
|
72
84
|
|
73
85
|
token = nil
|
74
86
|
|
@@ -83,7 +95,13 @@ module SRL
|
|
83
95
|
token = build_token(@@keywords[lexeme.upcase], lexeme)
|
84
96
|
# TODO: handle case unknown identifier
|
85
97
|
elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
|
86
|
-
token = build_token('LETTER_LIT', lexeme)
|
98
|
+
token = build_token('LETTER_LIT', lexeme)
|
99
|
+
elsif (lexeme = scanner.scan(/"([^"]|\\")*"/)) # Double quotes literal?
|
100
|
+
unquoted = lexeme.gsub(/(^")|("$)/, '')
|
101
|
+
token = build_token('STRING_LIT', unquoted)
|
102
|
+
elsif (lexeme = scanner.scan(/'([^']|\\')*'/)) # Single quotes literal?
|
103
|
+
unquoted = lexeme.gsub(/(^')|('$)/, '')
|
104
|
+
token = build_token('STRING_LIT', unquoted)
|
87
105
|
else # Unknown token
|
88
106
|
erroneous = curr_ch.nil? ? '' : curr_ch
|
89
107
|
sequel = scanner.scan(/.{1,20}/)
|
@@ -96,7 +114,14 @@ module SRL
|
|
96
114
|
|
97
115
|
def build_token(aSymbolName, aLexeme)
|
98
116
|
token_type = name2symbol[aSymbolName]
|
99
|
-
|
117
|
+
begin
|
118
|
+
token = Rley::Lexical::Token.new(aLexeme, token_type)
|
119
|
+
rescue Exception => ex
|
120
|
+
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
121
|
+
raise ex
|
122
|
+
end
|
123
|
+
|
124
|
+
return token
|
100
125
|
end
|
101
126
|
|
102
127
|
def skip_whitespaces()
|
@@ -16,7 +16,6 @@ describe 'Integration tests:' do
|
|
16
16
|
end
|
17
17
|
|
18
18
|
context 'Parsing character ranges:' do
|
19
|
-
|
20
19
|
it "should parse 'letter from ... to ...' syntax" do
|
21
20
|
result = parse('letter from a to f')
|
22
21
|
expect(result).to be_success
|
@@ -56,13 +55,41 @@ describe 'Integration tests:' do
|
|
56
55
|
regexp = regexp_repr(result)
|
57
56
|
expect(regexp.to_str).to eq('[1-4]')
|
58
57
|
end
|
58
|
+
end # context
|
59
|
+
|
60
|
+
context 'Parsing string literals:' do
|
61
|
+
it 'should parse double quotes literal string' do
|
62
|
+
result = parse('literally "hello"')
|
63
|
+
expect(result).to be_success
|
64
|
+
|
65
|
+
regexp = regexp_repr(result)
|
66
|
+
expect(regexp.to_str).to eq('hello')
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'should parse single quotes literal string' do
|
70
|
+
result = parse("literally 'hello'")
|
71
|
+
expect(result).to be_success
|
72
|
+
|
73
|
+
regexp = regexp_repr(result)
|
74
|
+
expect(regexp.to_str).to eq('hello')
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'should escape special characters' do
|
78
|
+
result = parse("literally '.'")
|
79
|
+
expect(result).to be_success
|
80
|
+
|
81
|
+
regexp = regexp_repr(result)
|
82
|
+
expect(regexp.to_str).to eq('\.')
|
83
|
+
end
|
84
|
+
end
|
59
85
|
|
86
|
+
context 'Parsing character classes:' do
|
60
87
|
it "should parse 'digit' syntax" do
|
61
88
|
result = parse('digit')
|
62
89
|
expect(result).to be_success
|
63
90
|
|
64
91
|
regexp = regexp_repr(result)
|
65
|
-
expect(regexp.to_str).to eq('
|
92
|
+
expect(regexp.to_str).to eq('\d')
|
66
93
|
end
|
67
94
|
|
68
95
|
it "should parse 'number' syntax" do
|
@@ -70,9 +97,126 @@ describe 'Integration tests:' do
|
|
70
97
|
expect(result).to be_success
|
71
98
|
|
72
99
|
regexp = regexp_repr(result)
|
73
|
-
expect(regexp.to_str).to eq('
|
100
|
+
expect(regexp.to_str).to eq('\d')
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should parse 'any character' syntax" do
|
104
|
+
result = parse('any character')
|
105
|
+
expect(result).to be_success
|
106
|
+
|
107
|
+
regexp = regexp_repr(result)
|
108
|
+
expect(regexp.to_str).to eq('\w')
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should parse 'no character' syntax" do
|
112
|
+
result = parse('no character')
|
113
|
+
expect(result).to be_success
|
114
|
+
|
115
|
+
regexp = regexp_repr(result)
|
116
|
+
expect(regexp.to_str).to eq('\W')
|
117
|
+
end
|
118
|
+
|
119
|
+
it "should parse 'whitespace' syntax" do
|
120
|
+
result = parse('whitespace')
|
121
|
+
expect(result).to be_success
|
122
|
+
|
123
|
+
regexp = regexp_repr(result)
|
124
|
+
expect(regexp.to_str).to eq('\s')
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should parse 'no whitespace' syntax" do
|
128
|
+
result = parse('no whitespace')
|
129
|
+
expect(result).to be_success
|
130
|
+
|
131
|
+
regexp = regexp_repr(result)
|
132
|
+
expect(regexp.to_str).to eq('\S')
|
74
133
|
end
|
75
134
|
|
135
|
+
it "should parse 'anything' syntax" do
|
136
|
+
result = parse('anything')
|
137
|
+
expect(result).to be_success
|
138
|
+
|
139
|
+
regexp = regexp_repr(result)
|
140
|
+
expect(regexp.to_str).to eq('.')
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should parse 'one of' syntax" do
|
144
|
+
result = parse('one of "._%+-"')
|
145
|
+
expect(result).to be_success
|
146
|
+
|
147
|
+
regexp = regexp_repr(result)
|
148
|
+
# Remark: reference implementation less readable
|
149
|
+
# (escapes more characters than required)
|
150
|
+
expect(regexp.to_str).to eq('[._%+\-]')
|
151
|
+
end
|
152
|
+
end # context
|
153
|
+
|
154
|
+
|
155
|
+
context 'Parsing special character declarations:' do
|
156
|
+
it "should parse 'tab' syntax" do
|
157
|
+
result = parse('tab')
|
158
|
+
expect(result).to be_success
|
159
|
+
|
160
|
+
regexp = regexp_repr(result)
|
161
|
+
expect(regexp.to_str).to eq('\t')
|
162
|
+
end
|
163
|
+
|
164
|
+
it "should parse 'backslash' syntax" do
|
165
|
+
result = parse('backslash')
|
166
|
+
expect(result).to be_success
|
167
|
+
|
168
|
+
regexp = regexp_repr(result)
|
169
|
+
expect(regexp.to_str).to eq('\\')
|
170
|
+
end
|
171
|
+
|
172
|
+
it "should parse 'new line' syntax" do
|
173
|
+
result = parse('new line')
|
174
|
+
expect(result).to be_success
|
175
|
+
|
176
|
+
regexp = regexp_repr(result)
|
177
|
+
expect(regexp.to_str).to eq('\n')
|
178
|
+
end
|
179
|
+
end # context
|
180
|
+
|
181
|
+
context 'Parsing alternations:' do
|
182
|
+
it "should parse 'any of' syntax" do
|
183
|
+
source = 'any of (any character, one of "._%-+")'
|
184
|
+
result = parse(source)
|
185
|
+
expect(result).to be_success
|
186
|
+
|
187
|
+
regexp = regexp_repr(result)
|
188
|
+
expect(regexp.to_str).to eq('(?:\w|[._%\-+])')
|
189
|
+
end
|
190
|
+
end # context
|
191
|
+
|
192
|
+
context 'Parsing concatenation:' do
|
193
|
+
it "should reject dangling comma" do
|
194
|
+
source = 'literally "a",'
|
195
|
+
result = parse(source)
|
196
|
+
expect(result).not_to be_success
|
197
|
+
message_prefix = /Premature end of input after ','/
|
198
|
+
expect(result.failure_reason.message).to match(message_prefix)
|
199
|
+
end
|
200
|
+
|
201
|
+
it "should parse a sequence of patterns" do
|
202
|
+
#
|
203
|
+
# DEBUG When I put a comma at the end ... looping endlessly
|
204
|
+
#
|
205
|
+
source = <<-ENDS
|
206
|
+
any of (any character, one of "._%-+") once or more,
|
207
|
+
literally "@",
|
208
|
+
any of (digit, letter, one of ".-") once or more,
|
209
|
+
literally ".",
|
210
|
+
letter at least 2 times
|
211
|
+
ENDS
|
212
|
+
|
213
|
+
result = parse(source)
|
214
|
+
expect(result).to be_success
|
215
|
+
|
216
|
+
regexp = regexp_repr(result)
|
217
|
+
# SRL expect: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
218
|
+
expect(regexp.to_str).to eq('(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}')
|
219
|
+
end
|
76
220
|
end # context
|
77
221
|
|
78
222
|
context 'Parsing quantifiers:' do
|
@@ -87,19 +231,19 @@ describe 'Integration tests:' do
|
|
87
231
|
end
|
88
232
|
|
89
233
|
it "should parse 'twice' syntax" do
|
90
|
-
result = parse(
|
234
|
+
result = parse('digit twice')
|
91
235
|
expect(result).to be_success
|
92
236
|
|
93
237
|
regexp = regexp_repr(result)
|
94
|
-
expect(regexp.to_str).to eq('
|
238
|
+
expect(regexp.to_str).to eq('\d{2}')
|
95
239
|
end
|
96
240
|
|
97
241
|
it "should parse 'optional' syntax" do
|
98
|
-
result = parse(
|
242
|
+
result = parse('anything optional')
|
99
243
|
expect(result).to be_success
|
100
244
|
|
101
245
|
regexp = regexp_repr(result)
|
102
|
-
expect(regexp.to_str).to eq('
|
246
|
+
expect(regexp.to_str).to eq('.?')
|
103
247
|
end
|
104
248
|
|
105
249
|
it "should parse 'exactly ... times' syntax" do
|
@@ -121,7 +265,6 @@ describe 'Integration tests:' do
|
|
121
265
|
expect(regexp.to_str).to eq('[p-t]{2,4}')
|
122
266
|
end
|
123
267
|
|
124
|
-
|
125
268
|
it "should parse 'once or more' syntax" do
|
126
269
|
result = parse(prefix + 'once or more')
|
127
270
|
expect(result).to be_success
|