rley 0.6.00 → 0.6.01

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -1
  3. data/CHANGELOG.md +3 -0
  4. data/Gemfile +1 -1
  5. data/examples/NLP/benchmark_pico_en.rb +6 -10
  6. data/examples/NLP/nano_eng/nano_en_demo.rb +2 -2
  7. data/examples/NLP/nano_eng/nano_grammar.rb +1 -2
  8. data/examples/data_formats/JSON/json_ast_builder.rb +8 -8
  9. data/examples/general/SRL/lib/ast_builder.rb +74 -72
  10. data/examples/general/SRL/lib/grammar.rb +2 -2
  11. data/examples/general/SRL/lib/regex/abstract_method.rb +28 -28
  12. data/examples/general/SRL/lib/regex/alternation.rb +21 -25
  13. data/examples/general/SRL/lib/regex/anchor.rb +6 -9
  14. data/examples/general/SRL/lib/regex/atomic_expression.rb +10 -15
  15. data/examples/general/SRL/lib/regex/capturing_group.rb +15 -14
  16. data/examples/general/SRL/lib/regex/char_class.rb +10 -13
  17. data/examples/general/SRL/lib/regex/char_range.rb +45 -46
  18. data/examples/general/SRL/lib/regex/char_shorthand.rb +8 -9
  19. data/examples/general/SRL/lib/regex/character.rb +196 -191
  20. data/examples/general/SRL/lib/regex/compound_expression.rb +47 -50
  21. data/examples/general/SRL/lib/regex/concatenation.rb +23 -27
  22. data/examples/general/SRL/lib/regex/expression.rb +53 -56
  23. data/examples/general/SRL/lib/regex/lookaround.rb +23 -20
  24. data/examples/general/SRL/lib/regex/match_option.rb +26 -28
  25. data/examples/general/SRL/lib/regex/monadic_expression.rb +20 -23
  26. data/examples/general/SRL/lib/regex/multiplicity.rb +17 -20
  27. data/examples/general/SRL/lib/regex/non_capturing_group.rb +9 -12
  28. data/examples/general/SRL/lib/regex/polyadic_expression.rb +51 -55
  29. data/examples/general/SRL/lib/regex/quantifiable.rb +14 -20
  30. data/examples/general/SRL/lib/regex/repetition.rb +20 -23
  31. data/examples/general/SRL/lib/regex/wildcard.rb +15 -19
  32. data/examples/general/SRL/lib/regex_repr.rb +1 -1
  33. data/examples/general/SRL/lib/tokenizer.rb +2 -2
  34. data/examples/general/SRL/spec/integration_spec.rb +17 -12
  35. data/examples/general/SRL/spec/regex/character_spec.rb +160 -153
  36. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +27 -31
  37. data/examples/general/SRL/spec/spec_helper.rb +1 -1
  38. data/examples/general/SRL/spec/tokenizer_spec.rb +25 -27
  39. data/examples/general/calc_iter1/calc_ast_builder.rb +10 -10
  40. data/examples/general/calc_iter2/calc_ast_builder.rb +7 -9
  41. data/examples/general/calc_iter2/calc_ast_nodes.rb +5 -6
  42. data/examples/general/calc_iter2/calc_lexer.rb +3 -5
  43. data/examples/general/calc_iter2/spec/calculator_spec.rb +16 -14
  44. data/examples/general/left.rb +8 -8
  45. data/examples/general/right.rb +8 -8
  46. data/lib/rley/constants.rb +1 -1
  47. data/lib/rley/engine.rb +16 -20
  48. data/lib/rley/formatter/json.rb +1 -1
  49. data/lib/rley/gfg/grm_flow_graph.rb +1 -1
  50. data/lib/rley/gfg/item_vertex.rb +6 -5
  51. data/lib/rley/gfg/vertex.rb +3 -3
  52. data/lib/rley/lexical/token.rb +4 -3
  53. data/lib/rley/parse_rep/ast_base_builder.rb +4 -3
  54. data/lib/rley/parse_rep/parse_rep_creator.rb +1 -1
  55. data/lib/rley/parse_rep/parse_tree_builder.rb +3 -2
  56. data/lib/rley/parser/error_reason.rb +1 -1
  57. data/lib/rley/parser/gfg_chart.rb +6 -6
  58. data/lib/rley/parser/gfg_parsing.rb +19 -19
  59. data/lib/rley/parser/parse_entry.rb +3 -3
  60. data/lib/rley/parser/parse_entry_set.rb +1 -1
  61. data/lib/rley/parser/parse_walker_factory.rb +15 -15
  62. data/lib/rley/syntax/grammar.rb +1 -1
  63. data/lib/rley/syntax/grammar_builder.rb +2 -2
  64. data/lib/rley/syntax/production.rb +4 -3
  65. data/lib/rley/syntax/symbol_seq.rb +2 -2
  66. data/spec/rley/base/grm_items_builder_spec.rb +1 -1
  67. data/spec/rley/engine_spec.rb +3 -6
  68. data/spec/rley/formatter/asciitree_spec.rb +0 -1
  69. data/spec/rley/formatter/bracket_notation_spec.rb +0 -1
  70. data/spec/rley/formatter/debug_spec.rb +2 -3
  71. data/spec/rley/gfg/grm_flow_graph_spec.rb +19 -19
  72. data/spec/rley/parse_rep/ast_builder_spec.rb +12 -12
  73. data/spec/rley/parser/gfg_earley_parser_spec.rb +1 -1
  74. data/spec/rley/parser/parse_entry_set_spec.rb +5 -5
  75. data/spec/rley/parser/parse_state_spec.rb +8 -3
  76. data/spec/rley/parser/parse_tracer_spec.rb +3 -1
  77. data/spec/rley/parser/parse_walker_factory_spec.rb +1 -1
  78. data/spec/rley/ptree/parse_tree_node_spec.rb +1 -1
  79. data/spec/rley/syntax/grammar_builder_spec.rb +1 -1
  80. data/spec/rley/syntax/grammar_spec.rb +1 -1
  81. metadata +2 -3
  82. data/spec/rley/support/ast_builder.rb +0 -403
@@ -32,7 +32,7 @@ module SRL
32
32
  rule('pattern' => 'quantifiable').as 'basic_pattern'
33
33
  rule('separator' => 'COMMA').as 'comma_separator'
34
34
  rule('separator' => []).as 'void_separator'
35
- rule('flags' => %[flags separator single_flag]).as 'flag_sequence'
35
+ rule('flags' => %w[flags separator single_flag]).as 'flag_sequence'
36
36
  rule('single_flag' => %w[CASE INSENSITIVE]).as 'case_insensitive'
37
37
  rule('single_flag' => %w[MULTI LINE]).as 'multi_line'
38
38
  rule('single_flag' => %w[ALL LAZY]).as 'all_lazy'
@@ -103,4 +103,4 @@ module SRL
103
103
 
104
104
  # And now build the grammar and make it accessible via a global constant
105
105
  Grammar = builder.grammar
106
- end # module
106
+ end # module
@@ -3,33 +3,33 @@
3
3
  # Mix-in module. Provides the method 'abstract_method' that raises an exception
4
4
  # with an appropriate message when called.
5
5
  module AbstractMethod
6
- public
7
-
8
- # Call this method in the body of your abstract methods.
9
- # Example:
10
- # require 'AbstractMethod'
11
- # class SomeClass
12
- # include AbstractMethod # To add the behaviour from the mix-in module AbstractMethod
13
- # ...
14
- # Consider that SomeClass has an abstract method called 'some_method'
15
- #
16
- # def some_method() abstract_method
17
- # end
18
- def abstract_method()
19
- # Determine the short class name of self
20
- className = self.class.name.split(/::/).last
21
-
22
- # Retrieve the top text line of the call stack
23
- top_line = caller.first
24
-
25
- # Extract the calling method name
26
- callerNameInQuotes = top_line.scan(/`.+?$/).first
27
- callerName = callerNameInQuotes.gsub(/`|'/, '') # Remove enclosing quotes
28
-
29
- # Build the error message
30
- error_message = "The method #{className}##{callerName} is abstract. It should be implemented in subclasses of #{className}."
31
- raise NotImplementedError, error_message
32
- end
6
+ # Call this method in the body of your abstract methods.
7
+ # Example:
8
+ # require 'AbstractMethod'
9
+ # class SomeClass
10
+ # include AbstractMethod # To add the behaviour from the mix-in module AbstractMethod
11
+ # ...
12
+ # Consider that SomeClass has an abstract method called 'some_method'
13
+ #
14
+ # def some_method() abstract_method
15
+ # end
16
+ def abstract_method()
17
+ # Determine the short class name of self
18
+ className = self.class.name.split(/::/).last
19
+
20
+ # Retrieve the top text line of the call stack
21
+ top_line = caller(1..1)
22
+
23
+ # Extract the calling method name
24
+ callerNameInQuotes = top_line.scan(/`.+?$/).first
25
+ callerName = callerNameInQuotes.gsub(/`|'/, '') # Remove enclosing quotes
26
+
27
+ # Build the error message
28
+ prefix = "The method #{className}##{callerName} is abstract."
29
+ suffix = " It should be implemented in subclasses of #{className}."
30
+ error_message = prefix + suffix
31
+ raise NotImplementedError, error_message
32
+ end
33
33
  end # module
34
34
 
35
- # End of file
35
+ # End of file
@@ -1,31 +1,27 @@
1
1
  # File: alternation.rb
2
2
 
3
- require_relative 'polyadic_expression' # Access the superclass
3
+ require_relative 'polyadic_expression' # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
-
7
- # Abstract class. A n-ary matching operator.
8
- # It succeeds when one child expression succeeds to match the subject text
9
- class Alternation < PolyadicExpression
10
-
11
- # Constructor.
12
- def initialize(*theChildren)
13
- super(theChildren)
14
- end
15
-
16
- protected
17
-
18
- # Conversion method re-definition.
19
- # Purpose: Return the String representation of the concatented expressions.
20
- def text_repr()
21
- result_children = children.map { |aChild| aChild.to_str() }
22
- result = '(?:' + result_children.join('|') + ')'
23
-
24
- return result
25
- end
26
-
27
- end # class
28
-
6
+ # Abstract class. A n-ary matching operator.
7
+ # It succeeds when one child expression succeeds to match the subject text
8
+ class Alternation < PolyadicExpression
9
+ # Constructor.
10
+ def initialize(*theChildren)
11
+ super(theChildren)
12
+ end
13
+
14
+ protected
15
+
16
+ # Conversion method re-definition.
17
+ # Purpose: Return the String representation of the concatented expressions.
18
+ def text_repr()
19
+ result_children = children.map(&:to_str)
20
+ result = '(?:' + result_children.join('|') + ')'
21
+
22
+ return result
23
+ end
24
+ end # class
29
25
  end # module
30
26
 
31
- # End of file
27
+ # End of file
@@ -1,6 +1,6 @@
1
1
  # File: anchor.rb
2
2
 
3
- require_relative "atomic_expression" # Access the superclass
3
+ require_relative 'atomic_expression' # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
6
  # An anchor is a zero-width assertion based on the current position.
@@ -8,15 +8,15 @@ module Regex # This module is used as a namespace
8
8
  # A Hash for converting a lexeme to a symbolic value
9
9
  AnchorToSymbol = {
10
10
  # Lexeme => Symbol value
11
- '^' => :soLine, # Start of line
12
- '$' => :eoLine, # End of line
11
+ '^' => :soLine, # Start of line
12
+ '$' => :eoLine, # End of line
13
13
  '\A' => :soSubject,
14
14
  '\b' => :wordBoundary,
15
15
  '\B' => :nonAtWordBoundary,
16
16
  '\G' => :firstMatch,
17
17
  '\z' => :eoSubject,
18
18
  '\Z' => :eoSubjectOrBeforeNLAtEnd
19
- }
19
+ }.freeze
20
20
 
21
21
  # A symbolic value that identifies the type of assertion to perform
22
22
  attr_reader(:kind)
@@ -27,12 +27,10 @@ module Regex # This module is used as a namespace
27
27
  @kind = valid_kind(aKind)
28
28
  end
29
29
 
30
- public
31
-
32
30
  # Conversion method re-definition.
33
31
  # Purpose: Return the String representation of the expression.
34
32
  def to_str()
35
- return AnchorToSymbol.rassoc(kind).first()
33
+ return AnchorToSymbol.rassoc(kind).first
36
34
  end
37
35
 
38
36
  private
@@ -41,8 +39,7 @@ module Regex # This module is used as a namespace
41
39
  def valid_kind(aKind)
42
40
  return AnchorToSymbol[aKind]
43
41
  end
44
-
45
42
  end # class
46
43
  end # module
47
44
 
48
- # End of file
45
+ # End of file
@@ -1,21 +1,16 @@
1
1
  # File: atomic_expression.rb
2
2
 
3
- require_relative "expression" # Access the superclass
3
+ require_relative 'expression' # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
-
7
- # Abstract class. A valid regular expression that
8
- # cannot be further decomposed into sub-expressions.
9
- class AtomicExpression < Expression
10
-
11
- public
12
- # Redefined method. Return true since it may not have any child.
13
- def atomic?
14
- return true
15
- end
16
-
17
- end # class
18
-
6
+ # Abstract class. A valid regular expression that
7
+ # cannot be further decomposed into sub-expressions.
8
+ class AtomicExpression < Expression
9
+ # Redefined method. Return true since it may not have any child.
10
+ def atomic?
11
+ return true
12
+ end
13
+ end # class
19
14
  end # module
20
15
 
21
- # End of file
16
+ # End of file
@@ -1,30 +1,32 @@
1
1
  # File: capturing_group.rb
2
2
 
3
- require_relative "monadic_expression" # Access the superclass
3
+ require_relative 'monadic_expression' # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
-
7
6
  # An association between a capture variable and an expression
8
7
  # the subject text in the same serial arrangement
9
8
  class CapturingGroup < MonadicExpression
10
- # The capture variable id. It is a Fixnum when the capture group gets a sequence number,
9
+ # The capture variable id. It is a Fixnum when the capture group gets
10
+ # a sequence number,
11
11
  # a String when it is an user-defined name
12
12
  attr_reader(:id)
13
-
14
- # When true, then capturing group forbids backtracking requests from its parent expression.
13
+
14
+ # When true, then capturing group forbids backtracking requests from its parent
15
+ # expression.
15
16
  attr_reader(:no_backtrack)
16
-
17
+
17
18
  # Constructor.
18
- # [aChildExpression] A sub-expression to match. When successful the matching text is assigned to the capture variable.
19
+ # [aChildExpression] A sub-expression to match. When successful
20
+ # the matching text is assigned to the capture variable.
19
21
  # [theId] The id of the capture variable.
20
- # [noBacktrack] A flag that specifies whether the capturing group forbids backtracking requests from its parent expression.
22
+ # [noBacktrack] A flag that specifies whether the capturing group forbids
23
+ # backtracking requests from its parent expression.
21
24
  def initialize(aChildExpression, theId = nil, noBacktrack = false)
22
25
  super(aChildExpression)
23
26
  @id = theId
24
27
  @no_backtrack = noBacktrack
25
28
  end
26
-
27
- public
29
+
28
30
  # Return true iff the capturing group has a name (and not )
29
31
  def named?()
30
32
  return id.kind_of?(String)
@@ -37,14 +39,13 @@ module Regex # This module is used as a namespace
37
39
  atomic = no_backtrack ? '?>' : ''
38
40
  if child.is_a?(Regex::NonCapturingGroup)
39
41
  # Minor optimization
40
- result = '(' + atomic + prefix + child.child.to_str + ")"
42
+ result = '(' + atomic + prefix + child.child.to_str + ')'
41
43
  else
42
- result = '(' + atomic + prefix + child.to_str + ")"
44
+ result = '(' + atomic + prefix + child.to_str + ')'
43
45
  end
44
46
  return result
45
47
  end
46
-
47
48
  end # class
48
49
  end # module
49
50
 
50
- # End of file
51
+ # End of file
@@ -1,41 +1,38 @@
1
1
  # File: char_class.rb
2
2
 
3
- require_relative "polyadic_expression" # Access the superclass
3
+ require_relative 'polyadic_expression' # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
-
7
- # Abstract class. A n-ary matching operator.
6
+ # Abstract class. A n-ary matching operator.
8
7
  # It succeeds when one child expression succeeds to match the subject text.
9
8
  class CharClass < PolyadicExpression
10
9
  # These are characters with special meaning in character classes
11
10
  Metachars = ']\^-'.codepoints
12
11
  # A flag that indicates whether the character is negated
13
12
  attr_reader(:negated)
14
-
13
+
15
14
  # Constructor.
16
- def initialize(to_negate,*theChildren)
15
+ def initialize(to_negate, *theChildren)
17
16
  super(theChildren)
18
17
  @negated = to_negate
19
18
  end
20
19
 
21
20
  protected
22
-
21
+
23
22
  # Conversion method re-definition.
24
23
  # Purpose: Return the String representation of the character class.
25
24
  def text_repr()
26
- result_children = children.inject('') do |subResult, aChild|
25
+ result_children = children.inject('') do |subResult, aChild|
27
26
  if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
28
27
  subResult << "\\" # Escape meta-character...
29
28
  end
30
- subResult << aChild.to_str()
29
+ subResult << aChild.to_str
31
30
  end
32
- result = '['+ (negated ? '^' : '') + result_children + ']'
33
-
31
+ result = '[' + (negated ? '^' : '') + result_children + ']'
32
+
34
33
  return result
35
34
  end
36
-
37
35
  end # class
38
-
39
36
  end # module
40
37
 
41
- # End of file
38
+ # End of file
@@ -1,52 +1,51 @@
1
1
  # File: char_range.rb
2
2
 
3
- require_relative 'polyadic_expression' # Access the superclass
3
+ require_relative 'polyadic_expression' # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
-
7
- # A binary expression that represents a contiguous range of characters.
8
- # Assumption: characters are ordered by codepoint
9
- class CharRange < PolyadicExpression
10
-
11
- # Constructor.
12
- # [thelowerBound] A character that will be the lower bound value for the range.
13
- # [theUpperBound] A character that will be the upper bound value for the range.
14
- # TODO: optimisation. Build a Character if lower bound == upper bound.
15
- def initialize(theLowerBound, theUpperBound)
16
- range = validated_range(theLowerBound, theUpperBound)
17
- super(range)
18
- end
19
-
20
- public
21
- # Return the lower bound of the range.
22
- def lower()
23
- return children.first
24
- end
25
-
26
- # Return the upper bound of the range.
27
- def upper()
28
- return children.last
29
- end
30
-
31
- protected
32
-
33
- # Conversion method re-definition.
34
- # Purpose: Return the String representation of the concatented expressions.
35
- def text_repr()
36
- result = lower.to_str() + '-' + upper.to_str()
37
-
38
- return result
39
- end
40
-
41
- private
42
- # Validation method. Returns a couple of Characters.after their validation.
43
- def validated_range(theLowerBound, theUpperBound)
44
- raise StandardError, "Character range error: lower bound is greater than upper bound." if theLowerBound.codepoint > theUpperBound.codepoint
45
- return [theLowerBound, theUpperBound]
46
- end
47
-
48
- end # class
49
-
6
+ # A binary expression that represents a contiguous range of characters.
7
+ # Assumption: characters are ordered by codepoint
8
+ class CharRange < PolyadicExpression
9
+ # Constructor.
10
+ # [thelowerBound]
11
+ # A character that will be the lower bound value for the range.
12
+ # [theUpperBound]
13
+ # A character that will be the upper bound value for the range.
14
+ # TODO: optimisation. Build a Character if lower bound == upper bound.
15
+ def initialize(theLowerBound, theUpperBound)
16
+ range = validated_range(theLowerBound, theUpperBound)
17
+ super(range)
18
+ end
19
+
20
+ # Return the lower bound of the range.
21
+ def lower()
22
+ return children.first
23
+ end
24
+
25
+ # Return the upper bound of the range.
26
+ def upper()
27
+ return children.last
28
+ end
29
+
30
+ protected
31
+
32
+ # Conversion method re-definition.
33
+ # Purpose: Return the String representation of the concatented expressions.
34
+ def text_repr()
35
+ result = lower.to_str + '-' + upper.to_str
36
+
37
+ return result
38
+ end
39
+
40
+ private
41
+
42
+ # Validation method. Returns a couple of Characters.after their validation.
43
+ def validated_range(theLowerBound, theUpperBound)
44
+ msg = 'Character range error: lower bound is greater than upper bound.'
45
+ raise StandardError, msg if theLowerBound.codepoint > theUpperBound.codepoint
46
+ return [theLowerBound, theUpperBound]
47
+ end
48
+ end # class
50
49
  end # module
51
50
 
52
- # End of file
51
+ # End of file
@@ -1,9 +1,8 @@
1
1
  # File: char_shorthand.rb
2
2
 
3
- require_relative "atomic_expression" # Access the superclass
3
+ require_relative 'atomic_expression' # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
-
7
6
  # A pre-defined character class is in essence a name for a built-in, standard character class.
8
7
  class CharShorthand < AtomicExpression
9
8
  # A constant Hash that defines all the predefined character shorthands.
@@ -18,7 +17,7 @@ module Regex # This module is used as a namespace
18
17
  'S' => '[^ \t\r\n\f]',
19
18
  'w' => '[0-9a-zA-Z_]',
20
19
  'W' => '[^0-9a-zA-Z_]'
21
- }
20
+ }.freeze
22
21
 
23
22
  # An one-letter abbreviation
24
23
  attr_reader(:shortname)
@@ -29,23 +28,23 @@ module Regex # This module is used as a namespace
29
28
  end
30
29
 
31
30
  protected
32
-
31
+
33
32
  # Conversion method re-definition.
34
33
  # Purpose: Return the String representation of the expression.
35
34
  def text_repr()
36
35
  return "\\#{shortname}"
37
36
  end
38
37
 
39
- private
38
+ private
39
+
40
40
  # Return the validated short name.
41
41
  def valid_shortname(aShortname)
42
- raise StandardError, "Unknown predefined character class \\#{aShortname}" unless StandardCClasses.include? aShortname
42
+ msg = "Unknown predefined character class \\#{aShortname}"
43
+ raise StandardError, msg unless StandardCClasses.include? aShortname
43
44
 
44
45
  return aShortname
45
46
  end
46
-
47
47
  end # class
48
-
49
48
  end # module
50
49
 
51
- # End of file
50
+ # End of file