rley 0.6.00 → 0.6.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -1
- data/CHANGELOG.md +3 -0
- data/Gemfile +1 -1
- data/examples/NLP/benchmark_pico_en.rb +6 -10
- data/examples/NLP/nano_eng/nano_en_demo.rb +2 -2
- data/examples/NLP/nano_eng/nano_grammar.rb +1 -2
- data/examples/data_formats/JSON/json_ast_builder.rb +8 -8
- data/examples/general/SRL/lib/ast_builder.rb +74 -72
- data/examples/general/SRL/lib/grammar.rb +2 -2
- data/examples/general/SRL/lib/regex/abstract_method.rb +28 -28
- data/examples/general/SRL/lib/regex/alternation.rb +21 -25
- data/examples/general/SRL/lib/regex/anchor.rb +6 -9
- data/examples/general/SRL/lib/regex/atomic_expression.rb +10 -15
- data/examples/general/SRL/lib/regex/capturing_group.rb +15 -14
- data/examples/general/SRL/lib/regex/char_class.rb +10 -13
- data/examples/general/SRL/lib/regex/char_range.rb +45 -46
- data/examples/general/SRL/lib/regex/char_shorthand.rb +8 -9
- data/examples/general/SRL/lib/regex/character.rb +196 -191
- data/examples/general/SRL/lib/regex/compound_expression.rb +47 -50
- data/examples/general/SRL/lib/regex/concatenation.rb +23 -27
- data/examples/general/SRL/lib/regex/expression.rb +53 -56
- data/examples/general/SRL/lib/regex/lookaround.rb +23 -20
- data/examples/general/SRL/lib/regex/match_option.rb +26 -28
- data/examples/general/SRL/lib/regex/monadic_expression.rb +20 -23
- data/examples/general/SRL/lib/regex/multiplicity.rb +17 -20
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +9 -12
- data/examples/general/SRL/lib/regex/polyadic_expression.rb +51 -55
- data/examples/general/SRL/lib/regex/quantifiable.rb +14 -20
- data/examples/general/SRL/lib/regex/repetition.rb +20 -23
- data/examples/general/SRL/lib/regex/wildcard.rb +15 -19
- data/examples/general/SRL/lib/regex_repr.rb +1 -1
- data/examples/general/SRL/lib/tokenizer.rb +2 -2
- data/examples/general/SRL/spec/integration_spec.rb +17 -12
- data/examples/general/SRL/spec/regex/character_spec.rb +160 -153
- data/examples/general/SRL/spec/regex/multiplicity_spec.rb +27 -31
- data/examples/general/SRL/spec/spec_helper.rb +1 -1
- data/examples/general/SRL/spec/tokenizer_spec.rb +25 -27
- data/examples/general/calc_iter1/calc_ast_builder.rb +10 -10
- data/examples/general/calc_iter2/calc_ast_builder.rb +7 -9
- data/examples/general/calc_iter2/calc_ast_nodes.rb +5 -6
- data/examples/general/calc_iter2/calc_lexer.rb +3 -5
- data/examples/general/calc_iter2/spec/calculator_spec.rb +16 -14
- data/examples/general/left.rb +8 -8
- data/examples/general/right.rb +8 -8
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +16 -20
- data/lib/rley/formatter/json.rb +1 -1
- data/lib/rley/gfg/grm_flow_graph.rb +1 -1
- data/lib/rley/gfg/item_vertex.rb +6 -5
- data/lib/rley/gfg/vertex.rb +3 -3
- data/lib/rley/lexical/token.rb +4 -3
- data/lib/rley/parse_rep/ast_base_builder.rb +4 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +1 -1
- data/lib/rley/parse_rep/parse_tree_builder.rb +3 -2
- data/lib/rley/parser/error_reason.rb +1 -1
- data/lib/rley/parser/gfg_chart.rb +6 -6
- data/lib/rley/parser/gfg_parsing.rb +19 -19
- data/lib/rley/parser/parse_entry.rb +3 -3
- data/lib/rley/parser/parse_entry_set.rb +1 -1
- data/lib/rley/parser/parse_walker_factory.rb +15 -15
- data/lib/rley/syntax/grammar.rb +1 -1
- data/lib/rley/syntax/grammar_builder.rb +2 -2
- data/lib/rley/syntax/production.rb +4 -3
- data/lib/rley/syntax/symbol_seq.rb +2 -2
- data/spec/rley/base/grm_items_builder_spec.rb +1 -1
- data/spec/rley/engine_spec.rb +3 -6
- data/spec/rley/formatter/asciitree_spec.rb +0 -1
- data/spec/rley/formatter/bracket_notation_spec.rb +0 -1
- data/spec/rley/formatter/debug_spec.rb +2 -3
- data/spec/rley/gfg/grm_flow_graph_spec.rb +19 -19
- data/spec/rley/parse_rep/ast_builder_spec.rb +12 -12
- data/spec/rley/parser/gfg_earley_parser_spec.rb +1 -1
- data/spec/rley/parser/parse_entry_set_spec.rb +5 -5
- data/spec/rley/parser/parse_state_spec.rb +8 -3
- data/spec/rley/parser/parse_tracer_spec.rb +3 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +1 -1
- data/spec/rley/ptree/parse_tree_node_spec.rb +1 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +1 -1
- data/spec/rley/syntax/grammar_spec.rb +1 -1
- metadata +2 -3
- data/spec/rley/support/ast_builder.rb +0 -403
@@ -32,7 +32,7 @@ module SRL
|
|
32
32
|
rule('pattern' => 'quantifiable').as 'basic_pattern'
|
33
33
|
rule('separator' => 'COMMA').as 'comma_separator'
|
34
34
|
rule('separator' => []).as 'void_separator'
|
35
|
-
rule('flags' => %[flags separator single_flag]).as 'flag_sequence'
|
35
|
+
rule('flags' => %w[flags separator single_flag]).as 'flag_sequence'
|
36
36
|
rule('single_flag' => %w[CASE INSENSITIVE]).as 'case_insensitive'
|
37
37
|
rule('single_flag' => %w[MULTI LINE]).as 'multi_line'
|
38
38
|
rule('single_flag' => %w[ALL LAZY]).as 'all_lazy'
|
@@ -103,4 +103,4 @@ module SRL
|
|
103
103
|
|
104
104
|
# And now build the grammar and make it accessible via a global constant
|
105
105
|
Grammar = builder.grammar
|
106
|
-
end # module
|
106
|
+
end # module
|
@@ -3,33 +3,33 @@
|
|
3
3
|
# Mix-in module. Provides the method 'abstract_method' that raises an exception
|
4
4
|
# with an appropriate message when called.
|
5
5
|
module AbstractMethod
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
6
|
+
# Call this method in the body of your abstract methods.
|
7
|
+
# Example:
|
8
|
+
# require 'AbstractMethod'
|
9
|
+
# class SomeClass
|
10
|
+
# include AbstractMethod # To add the behaviour from the mix-in module AbstractMethod
|
11
|
+
# ...
|
12
|
+
# Consider that SomeClass has an abstract method called 'some_method'
|
13
|
+
#
|
14
|
+
# def some_method() abstract_method
|
15
|
+
# end
|
16
|
+
def abstract_method()
|
17
|
+
# Determine the short class name of self
|
18
|
+
className = self.class.name.split(/::/).last
|
19
|
+
|
20
|
+
# Retrieve the top text line of the call stack
|
21
|
+
top_line = caller(1..1)
|
22
|
+
|
23
|
+
# Extract the calling method name
|
24
|
+
callerNameInQuotes = top_line.scan(/`.+?$/).first
|
25
|
+
callerName = callerNameInQuotes.gsub(/`|'/, '') # Remove enclosing quotes
|
26
|
+
|
27
|
+
# Build the error message
|
28
|
+
prefix = "The method #{className}##{callerName} is abstract."
|
29
|
+
suffix = " It should be implemented in subclasses of #{className}."
|
30
|
+
error_message = prefix + suffix
|
31
|
+
raise NotImplementedError, error_message
|
32
|
+
end
|
33
33
|
end # module
|
34
34
|
|
35
|
-
# End of file
|
35
|
+
# End of file
|
@@ -1,31 +1,27 @@
|
|
1
1
|
# File: alternation.rb
|
2
2
|
|
3
|
-
require_relative 'polyadic_expression'
|
3
|
+
require_relative 'polyadic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
-
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
end # class
|
28
|
-
|
6
|
+
# Abstract class. A n-ary matching operator.
|
7
|
+
# It succeeds when one child expression succeeds to match the subject text
|
8
|
+
class Alternation < PolyadicExpression
|
9
|
+
# Constructor.
|
10
|
+
def initialize(*theChildren)
|
11
|
+
super(theChildren)
|
12
|
+
end
|
13
|
+
|
14
|
+
protected
|
15
|
+
|
16
|
+
# Conversion method re-definition.
|
17
|
+
# Purpose: Return the String representation of the concatented expressions.
|
18
|
+
def text_repr()
|
19
|
+
result_children = children.map(&:to_str)
|
20
|
+
result = '(?:' + result_children.join('|') + ')'
|
21
|
+
|
22
|
+
return result
|
23
|
+
end
|
24
|
+
end # class
|
29
25
|
end # module
|
30
26
|
|
31
|
-
# End of file
|
27
|
+
# End of file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# File: anchor.rb
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'atomic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
6
|
# An anchor is a zero-width assertion based on the current position.
|
@@ -8,15 +8,15 @@ module Regex # This module is used as a namespace
|
|
8
8
|
# A Hash for converting a lexeme to a symbolic value
|
9
9
|
AnchorToSymbol = {
|
10
10
|
# Lexeme => Symbol value
|
11
|
-
'^' => :soLine,
|
12
|
-
'$' => :eoLine,
|
11
|
+
'^' => :soLine, # Start of line
|
12
|
+
'$' => :eoLine, # End of line
|
13
13
|
'\A' => :soSubject,
|
14
14
|
'\b' => :wordBoundary,
|
15
15
|
'\B' => :nonAtWordBoundary,
|
16
16
|
'\G' => :firstMatch,
|
17
17
|
'\z' => :eoSubject,
|
18
18
|
'\Z' => :eoSubjectOrBeforeNLAtEnd
|
19
|
-
}
|
19
|
+
}.freeze
|
20
20
|
|
21
21
|
# A symbolic value that identifies the type of assertion to perform
|
22
22
|
attr_reader(:kind)
|
@@ -27,12 +27,10 @@ module Regex # This module is used as a namespace
|
|
27
27
|
@kind = valid_kind(aKind)
|
28
28
|
end
|
29
29
|
|
30
|
-
public
|
31
|
-
|
32
30
|
# Conversion method re-definition.
|
33
31
|
# Purpose: Return the String representation of the expression.
|
34
32
|
def to_str()
|
35
|
-
return AnchorToSymbol.rassoc(kind).first
|
33
|
+
return AnchorToSymbol.rassoc(kind).first
|
36
34
|
end
|
37
35
|
|
38
36
|
private
|
@@ -41,8 +39,7 @@ module Regex # This module is used as a namespace
|
|
41
39
|
def valid_kind(aKind)
|
42
40
|
return AnchorToSymbol[aKind]
|
43
41
|
end
|
44
|
-
|
45
42
|
end # class
|
46
43
|
end # module
|
47
44
|
|
48
|
-
# End of file
|
45
|
+
# End of file
|
@@ -1,21 +1,16 @@
|
|
1
1
|
# File: atomic_expression.rb
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
-
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
return true
|
15
|
-
end
|
16
|
-
|
17
|
-
end # class
|
18
|
-
|
6
|
+
# Abstract class. A valid regular expression that
|
7
|
+
# cannot be further decomposed into sub-expressions.
|
8
|
+
class AtomicExpression < Expression
|
9
|
+
# Redefined method. Return true since it may not have any child.
|
10
|
+
def atomic?
|
11
|
+
return true
|
12
|
+
end
|
13
|
+
end # class
|
19
14
|
end # module
|
20
15
|
|
21
|
-
# End of file
|
16
|
+
# End of file
|
@@ -1,30 +1,32 @@
|
|
1
1
|
# File: capturing_group.rb
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'monadic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
-
|
7
6
|
# An association between a capture variable and an expression
|
8
7
|
# the subject text in the same serial arrangement
|
9
8
|
class CapturingGroup < MonadicExpression
|
10
|
-
# The capture variable id. It is a Fixnum when the capture group gets
|
9
|
+
# The capture variable id. It is a Fixnum when the capture group gets
|
10
|
+
# a sequence number,
|
11
11
|
# a String when it is an user-defined name
|
12
12
|
attr_reader(:id)
|
13
|
-
|
14
|
-
# When true, then capturing group forbids backtracking requests from its parent
|
13
|
+
|
14
|
+
# When true, then capturing group forbids backtracking requests from its parent
|
15
|
+
# expression.
|
15
16
|
attr_reader(:no_backtrack)
|
16
|
-
|
17
|
+
|
17
18
|
# Constructor.
|
18
|
-
# [aChildExpression]
|
19
|
+
# [aChildExpression] A sub-expression to match. When successful
|
20
|
+
# the matching text is assigned to the capture variable.
|
19
21
|
# [theId] The id of the capture variable.
|
20
|
-
# [noBacktrack] A flag that specifies whether the capturing group forbids
|
22
|
+
# [noBacktrack] A flag that specifies whether the capturing group forbids
|
23
|
+
# backtracking requests from its parent expression.
|
21
24
|
def initialize(aChildExpression, theId = nil, noBacktrack = false)
|
22
25
|
super(aChildExpression)
|
23
26
|
@id = theId
|
24
27
|
@no_backtrack = noBacktrack
|
25
28
|
end
|
26
|
-
|
27
|
-
public
|
29
|
+
|
28
30
|
# Return true iff the capturing group has a name (and not )
|
29
31
|
def named?()
|
30
32
|
return id.kind_of?(String)
|
@@ -37,14 +39,13 @@ module Regex # This module is used as a namespace
|
|
37
39
|
atomic = no_backtrack ? '?>' : ''
|
38
40
|
if child.is_a?(Regex::NonCapturingGroup)
|
39
41
|
# Minor optimization
|
40
|
-
result = '(' + atomic + prefix + child.child.to_str +
|
42
|
+
result = '(' + atomic + prefix + child.child.to_str + ')'
|
41
43
|
else
|
42
|
-
result = '(' + atomic + prefix + child.to_str +
|
44
|
+
result = '(' + atomic + prefix + child.to_str + ')'
|
43
45
|
end
|
44
46
|
return result
|
45
47
|
end
|
46
|
-
|
47
48
|
end # class
|
48
49
|
end # module
|
49
50
|
|
50
|
-
# End of file
|
51
|
+
# End of file
|
@@ -1,41 +1,38 @@
|
|
1
1
|
# File: char_class.rb
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'polyadic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
-
|
7
|
-
# Abstract class. A n-ary matching operator.
|
6
|
+
# Abstract class. A n-ary matching operator.
|
8
7
|
# It succeeds when one child expression succeeds to match the subject text.
|
9
8
|
class CharClass < PolyadicExpression
|
10
9
|
# These are characters with special meaning in character classes
|
11
10
|
Metachars = ']\^-'.codepoints
|
12
11
|
# A flag that indicates whether the character is negated
|
13
12
|
attr_reader(:negated)
|
14
|
-
|
13
|
+
|
15
14
|
# Constructor.
|
16
|
-
def initialize(to_negate
|
15
|
+
def initialize(to_negate, *theChildren)
|
17
16
|
super(theChildren)
|
18
17
|
@negated = to_negate
|
19
18
|
end
|
20
19
|
|
21
20
|
protected
|
22
|
-
|
21
|
+
|
23
22
|
# Conversion method re-definition.
|
24
23
|
# Purpose: Return the String representation of the character class.
|
25
24
|
def text_repr()
|
26
|
-
result_children = children.inject('') do |subResult, aChild|
|
25
|
+
result_children = children.inject('') do |subResult, aChild|
|
27
26
|
if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
|
28
27
|
subResult << "\\" # Escape meta-character...
|
29
28
|
end
|
30
|
-
subResult << aChild.to_str
|
29
|
+
subResult << aChild.to_str
|
31
30
|
end
|
32
|
-
result = '['+ (negated ? '^' : '')
|
33
|
-
|
31
|
+
result = '[' + (negated ? '^' : '') + result_children + ']'
|
32
|
+
|
34
33
|
return result
|
35
34
|
end
|
36
|
-
|
37
35
|
end # class
|
38
|
-
|
39
36
|
end # module
|
40
37
|
|
41
|
-
# End of file
|
38
|
+
# End of file
|
@@ -1,52 +1,51 @@
|
|
1
1
|
# File: char_range.rb
|
2
2
|
|
3
|
-
require_relative 'polyadic_expression'
|
3
|
+
require_relative 'polyadic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
-
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
end # class
|
49
|
-
|
6
|
+
# A binary expression that represents a contiguous range of characters.
|
7
|
+
# Assumption: characters are ordered by codepoint
|
8
|
+
class CharRange < PolyadicExpression
|
9
|
+
# Constructor.
|
10
|
+
# [thelowerBound]
|
11
|
+
# A character that will be the lower bound value for the range.
|
12
|
+
# [theUpperBound]
|
13
|
+
# A character that will be the upper bound value for the range.
|
14
|
+
# TODO: optimisation. Build a Character if lower bound == upper bound.
|
15
|
+
def initialize(theLowerBound, theUpperBound)
|
16
|
+
range = validated_range(theLowerBound, theUpperBound)
|
17
|
+
super(range)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Return the lower bound of the range.
|
21
|
+
def lower()
|
22
|
+
return children.first
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return the upper bound of the range.
|
26
|
+
def upper()
|
27
|
+
return children.last
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
|
32
|
+
# Conversion method re-definition.
|
33
|
+
# Purpose: Return the String representation of the concatented expressions.
|
34
|
+
def text_repr()
|
35
|
+
result = lower.to_str + '-' + upper.to_str
|
36
|
+
|
37
|
+
return result
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# Validation method. Returns a couple of Characters.after their validation.
|
43
|
+
def validated_range(theLowerBound, theUpperBound)
|
44
|
+
msg = 'Character range error: lower bound is greater than upper bound.'
|
45
|
+
raise StandardError, msg if theLowerBound.codepoint > theUpperBound.codepoint
|
46
|
+
return [theLowerBound, theUpperBound]
|
47
|
+
end
|
48
|
+
end # class
|
50
49
|
end # module
|
51
50
|
|
52
|
-
# End of file
|
51
|
+
# End of file
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# File: char_shorthand.rb
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'atomic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
-
|
7
6
|
# A pre-defined character class is in essence a name for a built-in, standard character class.
|
8
7
|
class CharShorthand < AtomicExpression
|
9
8
|
# A constant Hash that defines all the predefined character shorthands.
|
@@ -18,7 +17,7 @@ module Regex # This module is used as a namespace
|
|
18
17
|
'S' => '[^ \t\r\n\f]',
|
19
18
|
'w' => '[0-9a-zA-Z_]',
|
20
19
|
'W' => '[^0-9a-zA-Z_]'
|
21
|
-
}
|
20
|
+
}.freeze
|
22
21
|
|
23
22
|
# An one-letter abbreviation
|
24
23
|
attr_reader(:shortname)
|
@@ -29,23 +28,23 @@ module Regex # This module is used as a namespace
|
|
29
28
|
end
|
30
29
|
|
31
30
|
protected
|
32
|
-
|
31
|
+
|
33
32
|
# Conversion method re-definition.
|
34
33
|
# Purpose: Return the String representation of the expression.
|
35
34
|
def text_repr()
|
36
35
|
return "\\#{shortname}"
|
37
36
|
end
|
38
37
|
|
39
|
-
|
38
|
+
private
|
39
|
+
|
40
40
|
# Return the validated short name.
|
41
41
|
def valid_shortname(aShortname)
|
42
|
-
|
42
|
+
msg = "Unknown predefined character class \\#{aShortname}"
|
43
|
+
raise StandardError, msg unless StandardCClasses.include? aShortname
|
43
44
|
|
44
45
|
return aShortname
|
45
46
|
end
|
46
|
-
|
47
47
|
end # class
|
48
|
-
|
49
48
|
end # module
|
50
49
|
|
51
|
-
# End of file
|
50
|
+
# End of file
|