RubyGems - rley - Versions diffs - 0.5.11 → 0.5.12 - Mend

rley 0.5.11 → 0.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/examples/general/SRL/lib/ast_builder.rb +229 -38
data/examples/general/SRL/lib/grammar.rb +39 -7
data/examples/general/SRL/lib/regex/alternation.rb +3 -2
data/examples/general/SRL/lib/regex/anchor.rb +48 -0
data/examples/general/SRL/lib/regex/capturing_group.rb +50 -0
data/examples/general/SRL/lib/regex/char_class.rb +3 -2
data/examples/general/SRL/lib/regex/char_range.rb +4 -2
data/examples/general/SRL/lib/regex/char_shorthand.rb +3 -2
data/examples/general/SRL/lib/regex/character.rb +16 -14
data/examples/general/SRL/lib/regex/concatenation.rb +8 -7
data/examples/general/SRL/lib/regex/expression.rb +26 -5
data/examples/general/SRL/lib/regex/lookaround.rb +47 -0
data/examples/general/SRL/lib/regex/match_option.rb +36 -0
data/examples/general/SRL/lib/regex/non_capturing_group.rb +3 -2
data/examples/general/SRL/lib/regex/repetition.rb +3 -2
data/examples/general/SRL/lib/regex/wildcard.rb +3 -2
data/examples/general/SRL/lib/regex_repr.rb +3 -0
data/examples/general/SRL/lib/tokenizer.rb +26 -7
data/examples/general/SRL/spec/integration_spec.rb +148 -5
data/lib/rley/constants.rb +1 -1
data/lib/rley/parser/parse_tree_builder.rb +1 -1
metadata +6 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 0ec06612f299302b861fbaeb04b75c0040a026cf
-  data.tar.gz: d68438efcbacceb2ae4319ac268492e93db35265
+  metadata.gz: 78c5a2a83d1691c6c470f2fb4bf347f7dca44cb6
+  data.tar.gz: 9c1f62cdf775e71e33ceecfc99db58298b4e3b82
 SHA512:
-  metadata.gz: ec3be765a424028c986ea4812cf6f1485f04285beb2b9d8fffc774fc0b61108d4d6758a09a648132562752ab25904fb38f8ee57ecff90d0a70bca253150ed130
-  data.tar.gz: 2463def65eecbefed2bbfffc61e63e88dca2d0498078e83bc742811e540718e95e75f3896fa31b5bdc9068f5420906f389615470a86831dbcb5025824645775d
+  metadata.gz: 662e52aa9aae198f6eb5e9ed1750a1bfa873969ad9bedea4ca7225babef3d9fa648b709c4bef4959432d62cf8e1733d556ce911efec9885a0d59e6d4972c0f50
+  data.tar.gz: ab114da248a85b5e78fdb2ba9affca357a52b0cbe45cbaa97b1ad41becf71b0fa89db6e1f4487c0a23c59b9b9b83db5101e511f2b711a0d4f9a324ac2c3f3a2d

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,12 @@
+### 0.5.12 / 2018-02-03
+* [CHANGE] Simple Regex Language is fully supported!...
+* [CHANGE] File `examples/general/SRL/grammar.rb added missing rule productions for Simple Regex Language.
+* [CHANGE] File `examples/general/SRL/ast_builder.rb Added transformation rules missing regular expressions features.
+* [CHANGE] File `examples/general/SRL/spac/integration_spec.rb Added tests for covering SRL.
+* [NEW] File `examples/general/SRL/lib/anchor.rb Added support for anchors in regular expressions.
+* [NEW] File `examples/general/SRL/lib/capturing_group.rb Added support for single character in regular expressions.
+* [NEW] File `examples/general/SRL/lib/lookaround.rb Added support for lookaround in regular expressions.
 ### 0.5.11 / 2018-01-25
 * [NEW] File `left.rb` added in `examples/general` folder for showing use of left-recursive rules.
 * [NEW] File `right.rb` added in `examples/general` folder for showing use of right-recursive rules (less performant).

data/examples/general/SRL/lib/ast_builder.rb CHANGED Viewed

@@ -14,6 +14,8 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
   Terminal2NodeClass = { }.freeze
+  attr_reader :options
   protected
   # Overriding method.
@@ -36,33 +38,109 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
   # @param theChildren [Array] Children nodes (one per rhs symbol)
   def new_parent_node(aProduction, aRange, theTokens, theChildren)
     node = case aProduction.name
-      when 'srl_0' # rule 'srl' => 'pattern'
+      when 'srl_0' # rule 'srl' => 'expression'
         return_first_child(aRange, theTokens, theChildren)
-      when 'pattern_0' # rule 'pattern' => %w[pattern COMMA quantifiable]
+      when 'expression_0' # rule 'expression' => %w[pattern separator flags]
+        reduce_expression_0(aProduction, aRange, theTokens, theChildren)
+      when 'expression_1' # rule 'expression' => 'pattern'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'pattern_0' # rule 'pattern' => %w[pattern separator quantifiable]
         reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
-      when 'pattern_1' # rule 'pattern' => %w[pattern quantifiable]
-        reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
-      when 'pattern_2' # rule 'pattern' => 'quantifiable'
+      when 'pattern_1' # rule 'pattern' => 'quantifiable'
         return_first_child(aRange, theTokens, theChildren)
-      when 'quantifiable_0' # rule 'quantifiable' => 'term'
+      when 'separator_0' # rule 'separator' => 'COMMA'
         return_first_child(aRange, theTokens, theChildren)
-      when 'quantifiable_1' # rule 'quantifiable' = %w[term quantifier]
+      when 'separator_1' # rule 'separator' => []
+        nil
+      when 'flags_0' # rule 'flags' => %[flags separator single_flag]
+        ### NEW
+        reduce_flags_0(aProduction, aRange, theTokens, theChildren)
+      when 'single_flag_0' # rule 'single_flag' => %w[CASE INSENSITIVE]
+        ### NEW
+        reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
+      when 'single_flag_1' # rule 'single_flag' => %w[MULTI LINE]
+        ### NEW
+        reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
+      when 'single_flag_2' # rule 'single_flag' => %w[ALL LAZY]
+        ### NEW
+        reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
+      # rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
+      when 'quantifiable_0'
+        reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'quantifiable' => %w[begin_anchor anchorable]
+      when 'quantifiable_1'
         reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
+      # rule 'quantifiable' => %w[anchorable end_anchor]
+      when 'quantifiable_2'
+        reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
+      when 'quantifiable_3' # rule 'quantifiable' => 'anchorable'
+        return_first_child(aRange, theTokens, theChildren)
+      # rule 'begin_anchor' => %w[STARTS WITH]
+      # rule 'begin_anchor' => %w[BEGIN WITH]
+      when 'begin_anchor_0', 'begin_anchor_1'
+        reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
+      when 'end_anchor_0' # rule 'end_anchor' => %w[MUST END]
+        reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
+      when 'anchorable_0' # rule 'anchorable' => 'assertable'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
+        reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
+      when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
+        reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
+      # rule 'assertion' => %w[IF FOLLOWED BY assertable]
+      when 'assertion_0'
+        reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
+      when 'assertion_1'
+        reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
+      # rule 'assertion' => %w[IF ALREADY HAD assertable]
+      when 'assertion_2'
+        reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
+      # rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
+      when 'assertion_3'
+        reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
+      when 'assertable_0' # rule 'assertable' => 'term'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'assertable_1' # rule 'assertable' => %w[term quantifier]
+        reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
       when 'term_0' # rule 'term' => 'atom'
         return_first_child(aRange, theTokens, theChildren)
       when 'term_1' # rule 'term' => 'alternation'
         return_first_child(aRange, theTokens, theChildren)
       when 'term_2' # rule 'term' => 'grouping'
         return_first_child(aRange, theTokens, theChildren)
+      when 'term_3' # rule 'term' => 'capturing_group'
+        return_first_child(aRange, theTokens, theChildren)
       when 'atom_0' # rule 'atom' => 'letter_range'
         return_first_child(aRange, theTokens, theChildren)
@@ -133,19 +211,34 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
       when 'alternation_0'
         reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
-      # rule 'alternatives' => %w[alternatives COMMA quantifiable]
+      # rule 'alternatives' => %w[alternatives separator quantifiable]
       when 'alternatives_0'
         reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
-      # rule 'alternatives' => %w[alternatives quantifiable]
-      when 'alternatives_1'
+      when 'alternatives_1' # rule 'alternatives' => 'quantifiable'
         reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
-      when 'alternatives_2' # rule 'alternatives' => 'quantifiable'
-        reduce_alternatives_2(aProduction, aRange, theTokens, theChildren)
+      when 'grouping_0' # rule 'grouping' => %w[LPAREN pattern RPAREN]
+        reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'capturing_group' => %w[CAPTURE assertable]
+      when 'capturing_group_0'
+        reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
+      when 'capturing_group_1'
+        reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
+      # rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
+      when 'capturing_group_2'
+        reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
+      # rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
+      when 'capturing_group_3'
+        reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
-      when 'grouping' # rule 'grouping' => %w[LPAREN pattern RPAREN]
-        reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
+      when 'var_name_0' # rule 'var_name' => 'STRING_LIT'
+        return_first_child(aRange, theTokens, theChildren)
       when 'quantifier_0' # rule 'quantifier' => 'ONCE'
         multiplicity(1, 1)
@@ -205,11 +298,11 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
       result = Regex::Concatenation.new(*chars)
     else
         if to_escape && Regex::Character::MetaChars.include?(aString)
-          result = Regex::Concatenation.new(Regex::Character.new("\\"),
+          result = Regex::Concatenation.new(Regex::Character.new("\\"),
             Regex::Character.new(aString))
         else
           result = Regex::Character.new(aString)
-        end
+        end
     end
     return result
@@ -237,21 +330,100 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
   def repetition(expressionToRepeat, aMultiplicity)
     return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
   end
-  # rule 'pattern' => %w[pattern COMMA quantifiable]
+  # rule 'expression' => %w[pattern separator flags]
+  def reduce_expression_0(aProduction, aRange, theTokens, theChildren)
+    @options = theChildren[2] if theChildren[2]
+    return_first_child(aRange, theTokens, theChildren)
+  end
+  # rule 'pattern' => %w[pattern separator quantifiable]
   def reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
     return Regex::Concatenation.new(theChildren[0], theChildren[2])
   end
-  # rule 'pattern' => %w[pattern quantifiable]
-  def reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
-    return Regex::Concatenation.new(theChildren[0], theChildren[1])
+  # rule 'flags' => %[flags separator single_flag]
+  def reduce_flags_0(aProduction, aRange, theTokens, theChildren)
+    theChildren[0] << theChildren[2]
   end
-  # rule 'quantifiable' => %w[term quantifier]
+  # rule 'single_flag' => %w[CASE INSENSITIVE]
+  def reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
+    return [ Regex::MatchOption.new(:IGNORECASE, true) ]
+  end
+  # rule 'single_flag' => %w[MULTI LINE]
+  def reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
+    return [ Regex::MatchOption.new(:MULTILINE, true) ]
+  end
+  # rule 'single_flag' => %w[ALL LAZY]
+  def reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
+    return [ Regex::MatchOption.new(:ALL_LAZY, true) ]
+  end
+  # rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
+  def reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
+    theChildren[1].begin_anchor = theChildren[0]
+    theChildren[1].end_anchor = theChildren[2]
+    return theChildren[1]
+  end
+  # rule 'quantifiable' => %w[begin_anchor anchorable]
   def reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
-    quantifier = theChildren.last
-    term = theChildren.first
+    theChildren[1].begin_anchor = theChildren[0]
+    return theChildren[1]
+  end
+  # rule 'quantifiable' => %w[anchorable end_anchor]
+  def reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
+    theChildren[0].end_anchor = theChildren[1]
+    return theChildren[0]
+  end
+  # rule 'begin_anchor' => %w[STARTS WITH]
+  # rule 'begin_anchor' => %w[BEGIN WITH]
+  def reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::Anchor.new('^')
+  end
+  # rule 'end_anchor' => %w[MUST END]
+  def reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::Anchor.new('$')
+  end
+  # rule 'anchorable' => %w[assertable assertion]
+  def reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
+    assertion = theChildren.last
+    assertion.children.unshift(theChildren[0])
+    return assertion
+  end
+  # rule 'assertion' => %w[IF FOLLOWED BY assertable]
+  def reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
+  end
+  # rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
+  def reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
+    return Regex::Lookaround.new(theChildren.last, :ahead, :negative)
+  end
+  # rule 'assertion' => %w[IF ALREADY HAD assertable]
+  def reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
+    return Regex::Lookaround.new(theChildren.last, :behind, :positive)
+  end
+  # rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
+  def reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
+    return Regex::Lookaround.new(theChildren.last, :behind, :negative)
+  end
+  # rule 'anchorable' => %w[term quantifier]
+  def reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
+    quantifier = theChildren[1]
+    term = theChildren[0]
     repetition(term, quantifier)
   end
@@ -348,37 +520,56 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
     raw_literal = theChildren[-1].token.lexeme.dup
     return string_literal(raw_literal)
   end
   # rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
   def reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
     return Regex::Alternation.new(*theChildren[3])
   end
-  # rule 'alternatives' => %w[alternatives COMMA quantifiable]
+  # rule 'alternatives' => %w[alternatives separator quantifiable]
   def reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
     return theChildren[0] << theChildren[-1]
   end
-  # rule 'alternatives' => %w[alternatives quantifiable]
-  def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
-    return theChildren[0] << theChildren[-1]
-  end
   # rule 'alternatives' => 'quantifiable'
-  def reduce_alternatives_2(aProduction, aRange, theTokens, theChildren)
+  def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
     return [theChildren.last]
   end
   # rule 'grouping' => %w[LPAREN pattern RPAREN]
   def reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
-    return Regex::NonCapturingGroup.new(theChildren[1])
+    return Regex::NonCapturingGroup.new(theChildren[1])
   end
+  # rule 'capturing_group' => %w[CAPTURE assertable]
+  def reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::CapturingGroup.new(theChildren[1])
+  end
+  # rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
+  def reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
+    group = Regex::CapturingGroup.new(theChildren[1])
+    return Regex::Concatenation.new(group, theChildren[3])
+  end
+  # rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
+  def reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
+    name = theChildren[3].token.lexeme.dup
+    return Regex::CapturingGroup.new(theChildren[1], name)
+  end
+  # rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
+  def reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
+    name = theChildren[3].token.lexeme.dup
+    group = Regex::CapturingGroup.new(theChildren[1], name)
+    return Regex::Concatenation.new(group, theChildren[5])
+  end
   # rule 'quantifier' => %w[EXACTLY count TIMES]
   def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
     count = theChildren[1].token.lexeme.to_i
     multiplicity(count, count)
-  end
+  end
   # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
   def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)

data/examples/general/SRL/lib/grammar.rb CHANGED Viewed

@@ -9,6 +9,8 @@ module SRL
     add_terminals('LPAREN', 'RPAREN', 'COMMA')
     add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
     add_terminals('LITERALLY', 'STRING_LIT')
+    add_terminals('BEGIN', 'STARTS', 'WITH')
+    add_terminals('MUST', 'END')
     add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
     add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
     add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
@@ -17,16 +19,42 @@ module SRL
     add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
     add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
     add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
+    add_terminals('IF', 'FOLLOWED', 'BY', 'NOT')
+    add_terminals('ALREADY', 'HAD')
+    add_terminals('CAPTURE', 'AS', 'UNTIL')
+    add_terminals('CASE', 'INSENSITIVE', 'MULTI', 'ALL')
+    add_terminals('LAZY')
-    rule 'srl' => 'pattern'
-    rule 'pattern' => %w[pattern COMMA quantifiable]
-    rule 'pattern' => %w[pattern quantifiable]
+    rule 'srl' => 'expression'
+    rule 'expression' => %w[pattern separator flags]
+    rule 'expression' => 'pattern'
+    rule 'pattern' => %w[pattern separator quantifiable]
     rule 'pattern' => 'quantifiable'
-    rule 'quantifiable' => 'term'
-    rule 'quantifiable' => %w[term quantifier]
+    rule 'separator' => 'COMMA'
+    rule 'separator' => []
+    rule 'flags' => %[flags separator single_flag]
+    rule 'single_flag' => %w[CASE INSENSITIVE]
+    rule 'single_flag' => %w[MULTI LINE]
+    rule 'single_flag' => %w[ALL LAZY]
+    rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
+    rule 'quantifiable' => %w[begin_anchor anchorable]
+    rule 'quantifiable' => %w[anchorable end_anchor]
+    rule 'quantifiable' => 'anchorable'
+    rule 'begin_anchor' => %w[STARTS WITH]
+    rule 'begin_anchor' => %w[BEGIN WITH]
+    rule 'end_anchor' => %w[MUST END]
+    rule 'anchorable' => 'assertable'
+    rule 'anchorable' => %w[assertable assertion]
+    rule 'assertion' => %w[IF FOLLOWED BY assertable]
+    rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
+    rule 'assertion' => %w[IF ALREADY HAD assertable]
+    rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
+    rule 'assertable' => 'term'
+    rule 'assertable' => %w[term quantifier]
     rule 'term' => 'atom'
     rule 'term' => 'alternation'
     rule 'term' => 'grouping'
+    rule 'term' => 'capturing_group'
     rule 'atom' => 'letter_range'
     rule 'atom' => 'digit_range'
     rule 'atom' => 'character_class'
@@ -49,10 +77,14 @@ module SRL
     rule 'special_char' => %w[NEW LINE]
     rule 'literal' => %w[LITERALLY STRING_LIT]
     rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
-    rule 'alternatives' => %w[alternatives COMMA quantifiable]
-    rule 'alternatives' => %w[alternatives quantifiable]
+    rule 'alternatives' => %w[alternatives separator quantifiable]
     rule 'alternatives' => 'quantifiable'
     rule 'grouping' => %w[LPAREN pattern RPAREN]
+    rule 'capturing_group' => %w[CAPTURE assertable]
+    rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
+    rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
+    rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
+    rule 'var_name' => 'STRING_LIT'
     rule 'quantifier' => 'ONCE'
     rule 'quantifier' => 'TWICE'
     rule 'quantifier' => %w[EXACTLY count TIMES]

data/examples/general/SRL/lib/regex/alternation.rb CHANGED Viewed

@@ -13,10 +13,11 @@ class Alternation < PolyadicExpression
 		super(theChildren)
 	end
-public
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the concatented expressions.
-	def to_str()
+	def text_repr()
 		result_children = children.map { |aChild| aChild.to_str() }
 		result =  '(?:' + result_children.join('|') + ')'

data/examples/general/SRL/lib/regex/anchor.rb ADDED Viewed

@@ -0,0 +1,48 @@
+# File: anchor.rb
+require_relative "atomic_expression"	# Access the superclass
+module Regex # This module is used as a namespace
+  # An anchor is a zero-width assertion based on the current position.
+  class Anchor < AtomicExpression
+    # A Hash for converting a lexeme to a symbolic value
+    AnchorToSymbol = {
+      # Lexeme => Symbol value
+      '^' => :soLine,	# Start of line
+      '$' => :eoLine,	# End of line
+      '\A' => :soSubject,
+      '\b' => :wordBoundary,
+      '\B' => :nonAtWordBoundary,
+      '\G' => :firstMatch,
+      '\z' => :eoSubject,
+      '\Z' => :eoSubjectOrBeforeNLAtEnd
+    }
+    # A symbolic value that identifies the type of assertion to perform
+    attr_reader(:kind)
+    # Constructor
+    # @param aKind [String] Lexeme representation of the anchor
+    def initialize(aKind)
+      @kind = valid_kind(aKind)
+    end
+    public
+    # Conversion method re-definition.
+    # Purpose: Return the String representation of the expression.
+    def to_str()
+      return AnchorToSymbol.rassoc(kind).first()
+    end
+    private
+    # Return the symbolic value corresponding to the given lexeme.
+    def valid_kind(aKind)
+      return AnchorToSymbol[aKind]
+    end
+  end # class
+end # module
+# End of file

data/examples/general/SRL/lib/regex/capturing_group.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# File: capturing_group.rb
+require_relative "monadic_expression"	# Access the superclass
+module Regex # This module is used as a namespace
+  # An association between a capture variable and an expression
+  # the subject text in the same serial arrangement
+  class CapturingGroup < MonadicExpression
+    # The capture variable id. It is a Fixnum when the capture group gets a sequence number,
+    # a String when it is an user-defined name
+    attr_reader(:id)
+    # When true, then capturing group forbids backtracking requests from its parent expression.
+    attr_reader(:no_backtrack)
+    # Constructor.
+    # [aChildExpression]	A sub-expression to match. When successful the matching text is assigned to the capture variable.
+    # [theId] The id of the capture variable.
+    # [noBacktrack] A flag that specifies whether the capturing group forbids backtracking requests from its parent expression.
+    def initialize(aChildExpression, theId = nil, noBacktrack = false)
+      super(aChildExpression)
+      @id = theId
+      @no_backtrack = noBacktrack
+    end
+  public
+    # Return true iff the capturing group has a name (and not )
+    def named?()
+      return id.kind_of?(String)
+    end
+    # Conversion method re-definition.
+    # Purpose: Return the String representation of the captured expression.
+    def to_str()
+      prefix = named? ? "?<#{id}>" : ''
+      atomic = no_backtrack ? '?>' : ''
+      if child.is_a?(Regex::NonCapturingGroup)
+        # Minor optimization
+        result = '(' + atomic + prefix + child.child.to_str + ")"
+      else
+        result = '(' + atomic + prefix + child.to_str + ")"
+      end
+      return result
+    end
+  end # class
+end # module
+# End of file

data/examples/general/SRL/lib/regex/char_class.rb CHANGED Viewed

@@ -18,10 +18,11 @@ module Regex # This module is used as a namespace
       @negated = to_negate
     end
-  public
+    protected
     # Conversion method re-definition.
     # Purpose: Return the String representation of the character class.
-    def to_str()
+    def text_repr()
       result_children = children.inject('') do |subResult, aChild|
         if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
           subResult << "\\" # Escape meta-character...

data/examples/general/SRL/lib/regex/char_range.rb CHANGED Viewed

@@ -26,11 +26,13 @@ public
 	# Return the upper bound of the range.
 	def upper()
 		return children.last
-	end
+	end
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the concatented expressions.
-	def to_str()
+	def text_repr()
 		result = lower.to_str() + '-' + upper.to_str()
 		return result

data/examples/general/SRL/lib/regex/char_shorthand.rb CHANGED Viewed

@@ -28,10 +28,11 @@ module Regex # This module is used as a namespace
       @shortname = valid_shortname(aShortname)
     end
-  public
+    protected
     # Conversion method re-definition.
     # Purpose: Return the String representation of the expression.
-    def to_str()
+    def text_repr()
       return "\\#{shortname}"
     end

data/examples/general/SRL/lib/regex/character.rb CHANGED Viewed

@@ -106,20 +106,6 @@ public
 		self.class.codepoint2char(@codepoint)
 	end
-	# Conversion method re-definition.
-	# Purpose: Return the String representation of the expression.
-	# If the Character was initially from a text (the lexeme), then the lexeme is returned back.
-	# Otherwise the character corresponding to the codepoint is returned.
-	def to_str()
-		if lexeme.nil?
-			result = char()
-		else
-			result = lexeme.dup()
-		end
-		return result
-	end
 	# Returns true iff this Character and parameter 'another' represent the same character.
 	# [another] any Object. The way the equality is tested depends on the another's class
 	# Example:
@@ -152,6 +138,22 @@ public
 	def explain()
 		return "the character '#{to_str()}'"
 	end
+  protected
+  # Conversion method re-definition.
+	# Purpose: Return the String representation of the expression.
+	# If the Character was initially from a text (the lexeme), then the lexeme is returned back.
+	# Otherwise the character corresponding to the codepoint is returned.
+	def text_repr()
+		if lexeme.nil?
+			result = char()
+		else
+			result = lexeme.dup()
+		end
+		return result
+	end
 private
 	# Convertion method that returns a codepoint for the given two characters (digram) escape sequence.

data/examples/general/SRL/lib/regex/concatenation.rb CHANGED Viewed

@@ -4,24 +4,25 @@ require_relative 'polyadic_expression'	# Access the superclass
 module Regex # This module is used as a namespace
-# Abstract class. A n-ary matching operator.
+# Abstract class. A n-ary matching operator.
 # It succeeds when each child succeeds to match the subject text in the same
 # serial arrangement than defined by this concatenation.
 class Concatenation < PolyadicExpression
 	# Constructor.
 	def initialize(*theChildren)
 		super(theChildren)
 	end
-public
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the concatented expressions.
-	def to_str()
-		result = children.inject('') { |result, aChild|
+	def text_repr()
+		result = children.inject('') { |result, aChild|
 			result << aChild.to_str()
 		}
 		return result
 	end

data/examples/general/SRL/lib/regex/expression.rb CHANGED Viewed

@@ -5,7 +5,11 @@ require_relative 'abstract_method'
 module Regex # This module is used as a namespace
 # Abstract class. The generalization of any valid regular (sub)expression.
-class Expression
+class Expression
+  attr_accessor :begin_anchor
+  attr_accessor :end_anchor
+  # Constructor
 	def initialize()
 	end
@@ -20,20 +24,37 @@ public
 	def cardinality(theParentOptions) abstract_method
 	end
-protected
 	# Determine the matching options to apply to this object, given the options coming from the parent
 	# and options that are local to this object. Local options take precedence.
-	# [theParentOptions] a Hash of matching options. They are overridden by options with same name
+	# @param theParentOptions [Hash] matching options. They are overridden by options with same name
 	# that are bound to this object.
 	def options(theParentOptions)
 		resulting_options = theParentOptions.merge(@local_options)
 		return resulting_options
 	end
-	# Abstract conversion method.
+	# Template method.
 	# Purpose: Return the String representation of the expression.
-	def to_str() abstract_method
+	def to_str()
+    result = ''
+    result << prefix
+    result << text_repr
+    result << suffix
+    return result
 	end
+  protected
+  def prefix()
+    begin_anchor ? begin_anchor.to_str : ''
+  end
+  def suffix()
+    end_anchor ? end_anchor.to_str : ''
+  end
 end # class

data/examples/general/SRL/lib/regex/lookaround.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# File: Lookaround.rb
+########################
+# TODO: make it a binary expression
+########################
+require_relative 'polyadic_expression'	# Access the superclass
+module Regex # This module is used as a namespace
+  # Lookaround is a zero-width assertion just like the start and end of line anchors.
+  # The difference is that lookarounds will actually match characters, but only return the result of the match: match or no match.
+  # That is why they are called "assertions". They do not consume characters from the subject,
+  # but only assert whether a match is possible or not.
+  class Lookaround < PolyadicExpression
+    # The "direction" of the lookaround. Can be ahead or behind. It specifies the relative position of the
+    # expression to match compared to the current 'position' in the subject text.
+    attr_reader(:dir)
+    # The kind indicates whether the assertion is positive (succeeds when there is a match) or negative
+    # (assertion succeeds when there is NO match).
+    attr_reader(:kind)
+    # Constructor.
+    # [assertedExpression]	A sub-expression to match.
+    # [theDir]	One of the following values: [ :ahead, :behind ]
+    # [theKind] One of the following values: [ :positive, :negative ]
+    def initialize(assertedExpression, theDir, theKind)
+      super([assertedExpression])
+      @dir, @kind = theDir, theKind
+    end
+  public
+    # Conversion method re-definition.
+    # Purpose: Return the String representation of the captured expression.
+    def to_str()
+      result = children[0].to_str
+      dir_syntax = (dir == :ahead) ? '' : '<'
+      kind_syntax = (kind == :positive)? '=' : '!'
+      result << '(?' + dir_syntax + kind_syntax + children[1].to_str + ")"
+      return result
+    end
+  end # class
+end # module
+# End of file

data/examples/general/SRL/lib/regex/match_option.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# File: MatchOption.rb
+module Regex # This module is used as a namespace
+# Represents an option that influences the way a regular (sub)expression can perform its matching.
+class MatchOption
+	# The symbolic name of the option
+	attr_reader(:name)
+	# An indicator that tells whether the option is turned on or off
+	attr_reader(:setting)
+	# Constructor.
+	def initialize(theName, theSetting)
+		@name, @setting = theName, theSetting
+	end
+public
+	# Equality operator
+	def ==(another)
+		return true if self.object_id == another.object_id
+		if another.kind_of?(MatchOption)
+			isEqual = ((name == another.name) && (setting == another.setting))
+		else
+			isEqual = false
+		end
+		return isEqual
+	end
+end # class
+end # module
+# End of file

data/examples/general/SRL/lib/regex/non_capturing_group.rb CHANGED Viewed

@@ -14,10 +14,11 @@ module Regex # This module is used as a namespace
       super(aChildExpression)
     end
-  public
+    protected
     # Conversion method re-definition.
     # Purpose: Return the String representation of the captured expression.
-    def to_str()
+    def text_repr()
       result = '(?:' + all_child_text() + ")"
       return result
     end

data/examples/general/SRL/lib/regex/repetition.rb CHANGED Viewed

@@ -16,10 +16,11 @@ class Repetition < MonadicExpression
 		@multiplicity = aMultiplicity
 	end
-public
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the concatented expressions.
-	def to_str()
+	def text_repr()
 		result = all_child_text() + multiplicity.to_str()
 		return result
 	end

data/examples/general/SRL/lib/regex/wildcard.rb CHANGED Viewed

@@ -12,10 +12,11 @@ class Wildcard < AtomicExpression
 		super
 	end
-public
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the expression.
-	def to_str()
+	def text_repr()
 		return '.'
 	end

data/examples/general/SRL/lib/regex_repr.rb CHANGED Viewed

@@ -8,3 +8,6 @@ require_relative './regex/char_shorthand'
 require_relative './regex/wildcard'
 require_relative './regex/alternation'
 require_relative './regex/non_capturing_group'
+require_relative './regex/anchor'
+require_relative './regex/lookaround'
+require_relative './regex/capturing_group'

data/examples/general/SRL/lib/tokenizer.rb CHANGED Viewed

@@ -22,41 +22,60 @@ module SRL
       ')' => 'RPAREN',
       ',' => 'COMMA'
     }.freeze
     # Here are all the SRL keywords (in uppercase)
     @@keywords = %w[
+      ALL
+      ALREADY
       AND
       ANY
       ANYTHING
+      AS
       AT
       BACKSLASH
+      BEGIN
       BETWEEN
+      BY
+      CAPTURE
+      CASE
       CHARACTER
       DIGIT
+      END
       EXACTLY
+      FOLLOWED
       FROM
+      HAD
+      IF
+      INSENSITIVE
+      LAZY
       LEAST
       LETTER
       LINE
       LITERALLY
       MORE
+      MULTI
+      MUST
       NEVER
       NEW
       NO
+      NOT
       NUMBER
       OF
       ONCE
       ONE
       OPTIONAL
       OR
+      STARTS
       TAB
       TIMES
       TO
       TWICE
+      UNTIL
       UPPERCASE
       WHITESPACE
+      WITH
     ].map { |x| [x, x] } .to_h
     class ScanError < StandardError; end
     def initialize(source, aGrammar)
@@ -81,16 +100,16 @@ module SRL
       skip_whitespaces
       curr_ch = scanner.peek(1)
       return nil if curr_ch.nil? || curr_ch.empty?
       token = nil
       if '(),'.include? curr_ch
         # Delimiters, separators => single character token
-        token = build_token(@@lexeme2name[curr_ch], scanner.getch)
+        token = build_token(@@lexeme2name[curr_ch], scanner.getch)
       elsif (lexeme = scanner.scan(/[0-9]{2,}/))
         token = build_token('INTEGER', lexeme) # An integer has two or more digits
       elsif (lexeme = scanner.scan(/[0-9]/))
-        token = build_token('DIGIT_LIT', lexeme)
+        token = build_token('DIGIT_LIT', lexeme)
       elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
         token = build_token(@@keywords[lexeme.upcase], lexeme)
         # TODO: handle case unknown identifier
@@ -111,7 +130,7 @@ module SRL
       return token
     end
     def build_token(aSymbolName, aLexeme)
       token_type = name2symbol[aSymbolName]
       begin
@@ -120,7 +139,7 @@ module SRL
         puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
         raise ex
       end
       return token
     end

data/examples/general/SRL/spec/integration_spec.rb CHANGED Viewed

@@ -151,7 +151,6 @@ describe 'Integration tests:' do
     end
   end # context
   context 'Parsing special character declarations:' do
     it "should parse 'tab' syntax" do
       result = parse('tab')
@@ -197,11 +196,16 @@ describe 'Integration tests:' do
       message_prefix = /Premature end of input after ','/
       expect(result.failure_reason.message).to match(message_prefix)
     end
+    it 'should parse concatenation' do
+      result = parse('any of (literally "sample", (digit once or more))')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
+    end
-    it "should parse a sequence of patterns" do
-      #
-      # DEBUG When I put a comma at the end ... looping endlessly
-      #
+    it "should parse a long sequence of patterns" do
       source = <<-ENDS
       any of (any character, one of "._%-+") once or more,
       literally "@",
@@ -289,6 +293,145 @@ ENDS
       expect(regexp.to_str).to eq('[p-t]{10,}')
     end
   end # context
+  context 'Parsing lookaround:' do
+    it 'should parse positive lookahead' do
+      result = parse('letter if followed by (anything once or more, digit)')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
+    end
+    it 'should parse negative lookahead' do
+      result = parse('letter if not followed by (anything once or more, digit)')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
+    end
+    it 'should parse positive lookbehind' do
+      result = parse('literally "bar" if already had literally "foo"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('bar(?<=foo)')
+    end
+    it 'should parse negative lookbehind' do
+      result = parse('literally "bar" if not already had literally "foo"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('bar(?<!foo)')
+    end
+  end # context
+  context 'Parsing capturing group:' do
+    it 'should parse simple anonymous capturing group' do
+      result = parse('capture(literally "sample")')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(sample)')
+    end
+    it 'should parse complex anonymous capturing group' do
+      result = parse('capture(any of (literally "sample", (digit once or more)))')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
+    end
+    it 'should parse simple anonymous until capturing group' do
+      result = parse('capture anything once or more until literally "!"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(.+)!')
+    end
+    it 'should parse complex named capturing group' do
+      result = parse('capture(any of (literally "sample", (digit once or more))) as "foo"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
+    end
+    it 'should parse a sequence with named capturing groups' do
+      source = <<-ENDS
+      capture (anything once or more) as "first",
+      literally " - ",
+      capture literally "second part" as "second"
+ENDS
+      result = parse(source)
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
+    end
+    it 'should parse complex named until capturing group' do
+      result = parse('capture (anything once or more) as "foo" until literally "m"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(?<foo>.+)m')
+    end
+  end # context
+  context 'Parsing anchors:' do
+    it 'should parse begin anchors' do
+      result = parse('starts with literally "match"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('^match')
+    end
+    it 'should parse begin anchors (alternative syntax)' do
+      result = parse('begin with literally "match"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('^match')
+    end
+    it 'should parse end anchors' do
+      result = parse('literally "match" must end')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('match$')
+    end
+    it 'should parse combination of begin and end anchors' do
+      result = parse('starts with literally "match" must end')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('^match$')
+    end
+    it "should accept anchor with a sequence of patterns" do
+      source = <<-ENDS
+      begin with any of (digit, letter, one of ".-") once or more,
+      literally ".",
+      letter at least 2 times must end
+ENDS
+      result = parse(source)
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      # SRL expect: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
+      expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
+    end
+  end # context
 end # describe

data/lib/rley/constants.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.5.11'.freeze
+  Version = '0.5.12'.freeze
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm".freeze

data/lib/rley/parser/parse_tree_builder.rb CHANGED Viewed

@@ -135,7 +135,7 @@ module Rley # This module is used as a namespace
               process_middle_entry(anEntry, anIndex)
             end
           else
-            $stderr.puts "Internal Errore '#{anEvent}'"
+            $stderr.puts "Internal Errore '#{anEvent}', entry: #{anEntry}, index: #{anIndex}"
             raise NotImplementedError
         end
       end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.5.11
+  version: 0.5.12
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-01-25 00:00:00.000000000 Z
+date: 2018-02-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: coveralls
@@ -149,7 +149,9 @@ files:
 - examples/general/SRL/lib/parser.rb
 - examples/general/SRL/lib/regex/abstract_method.rb
 - examples/general/SRL/lib/regex/alternation.rb
+- examples/general/SRL/lib/regex/anchor.rb
 - examples/general/SRL/lib/regex/atomic_expression.rb
+- examples/general/SRL/lib/regex/capturing_group.rb
 - examples/general/SRL/lib/regex/char_class.rb
 - examples/general/SRL/lib/regex/char_range.rb
 - examples/general/SRL/lib/regex/char_shorthand.rb
@@ -157,6 +159,8 @@ files:
 - examples/general/SRL/lib/regex/compound_expression.rb
 - examples/general/SRL/lib/regex/concatenation.rb
 - examples/general/SRL/lib/regex/expression.rb
+- examples/general/SRL/lib/regex/lookaround.rb
+- examples/general/SRL/lib/regex/match_option.rb
 - examples/general/SRL/lib/regex/monadic_expression.rb
 - examples/general/SRL/lib/regex/multiplicity.rb
 - examples/general/SRL/lib/regex/non_capturing_group.rb