RubyGems - rley - Versions diffs - 0.5.11 → 0.5.12 - Mend

rley 0.5.11 → 0.5.12

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/examples/general/SRL/lib/ast_builder.rb +229 -38
data/examples/general/SRL/lib/grammar.rb +39 -7
data/examples/general/SRL/lib/regex/alternation.rb +3 -2
data/examples/general/SRL/lib/regex/anchor.rb +48 -0
data/examples/general/SRL/lib/regex/capturing_group.rb +50 -0
data/examples/general/SRL/lib/regex/char_class.rb +3 -2
data/examples/general/SRL/lib/regex/char_range.rb +4 -2
data/examples/general/SRL/lib/regex/char_shorthand.rb +3 -2
data/examples/general/SRL/lib/regex/character.rb +16 -14
data/examples/general/SRL/lib/regex/concatenation.rb +8 -7
data/examples/general/SRL/lib/regex/expression.rb +26 -5
data/examples/general/SRL/lib/regex/lookaround.rb +47 -0
data/examples/general/SRL/lib/regex/match_option.rb +36 -0
data/examples/general/SRL/lib/regex/non_capturing_group.rb +3 -2
data/examples/general/SRL/lib/regex/repetition.rb +3 -2
data/examples/general/SRL/lib/regex/wildcard.rb +3 -2
data/examples/general/SRL/lib/regex_repr.rb +3 -0
data/examples/general/SRL/lib/tokenizer.rb +26 -7
data/examples/general/SRL/spec/integration_spec.rb +148 -5
data/lib/rley/constants.rb +1 -1
data/lib/rley/parser/parse_tree_builder.rb +1 -1
metadata +6 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 0ec06612f299302b861fbaeb04b75c0040a026cf
-  data.tar.gz: d68438efcbacceb2ae4319ac268492e93db35265
+  metadata.gz: 78c5a2a83d1691c6c470f2fb4bf347f7dca44cb6
+  data.tar.gz: 9c1f62cdf775e71e33ceecfc99db58298b4e3b82
 SHA512:
-  metadata.gz: ec3be765a424028c986ea4812cf6f1485f04285beb2b9d8fffc774fc0b61108d4d6758a09a648132562752ab25904fb38f8ee57ecff90d0a70bca253150ed130
-  data.tar.gz: 2463def65eecbefed2bbfffc61e63e88dca2d0498078e83bc742811e540718e95e75f3896fa31b5bdc9068f5420906f389615470a86831dbcb5025824645775d
+  metadata.gz: 662e52aa9aae198f6eb5e9ed1750a1bfa873969ad9bedea4ca7225babef3d9fa648b709c4bef4959432d62cf8e1733d556ce911efec9885a0d59e6d4972c0f50
+  data.tar.gz: ab114da248a85b5e78fdb2ba9affca357a52b0cbe45cbaa97b1ad41becf71b0fa89db6e1f4487c0a23c59b9b9b83db5101e511f2b711a0d4f9a324ac2c3f3a2d

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,12 @@
+### 0.5.12 / 2018-02-03
+* [CHANGE] Simple Regex Language is fully supported!...
+* [CHANGE] File `examples/general/SRL/grammar.rb added missing rule productions for Simple Regex Language.
+* [CHANGE] File `examples/general/SRL/ast_builder.rb Added transformation rules missing regular expressions features.
+* [CHANGE] File `examples/general/SRL/spac/integration_spec.rb Added tests for covering SRL.
+* [NEW] File `examples/general/SRL/lib/anchor.rb Added support for anchors in regular expressions.
+* [NEW] File `examples/general/SRL/lib/capturing_group.rb Added support for single character in regular expressions.
+* [NEW] File `examples/general/SRL/lib/lookaround.rb Added support for lookaround in regular expressions.
 ### 0.5.11 / 2018-01-25
 * [NEW] File `left.rb` added in `examples/general` folder for showing use of left-recursive rules.
 * [NEW] File `right.rb` added in `examples/general` folder for showing use of right-recursive rules (less performant).

data/examples/general/SRL/lib/ast_builder.rb CHANGED Viewed

@@ -14,6 +14,8 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
   Terminal2NodeClass = { }.freeze
+  attr_reader :options
   protected
   # Overriding method.
@@ -36,33 +38,109 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
   # @param theChildren [Array] Children nodes (one per rhs symbol)
   def new_parent_node(aProduction, aRange, theTokens, theChildren)
     node = case aProduction.name
-      when 'srl_0' # rule 'srl' => 'pattern'
+      when 'srl_0' # rule 'srl' => 'expression'
         return_first_child(aRange, theTokens, theChildren)
-      when 'pattern_0' # rule 'pattern' => %w[pattern COMMA quantifiable]
+      when 'expression_0' # rule 'expression' => %w[pattern separator flags]
+        reduce_expression_0(aProduction, aRange, theTokens, theChildren)
+      when 'expression_1' # rule 'expression' => 'pattern'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'pattern_0' # rule 'pattern' => %w[pattern separator quantifiable]
         reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
-      when 'pattern_1' # rule 'pattern' => %w[pattern quantifiable]
-        reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
-      when 'pattern_2' # rule 'pattern' => 'quantifiable'
+      when 'pattern_1' # rule 'pattern' => 'quantifiable'
         return_first_child(aRange, theTokens, theChildren)
-      when 'quantifiable_0' # rule 'quantifiable' => 'term'
+      when 'separator_0' # rule 'separator' => 'COMMA'
         return_first_child(aRange, theTokens, theChildren)
-      when 'quantifiable_1' # rule 'quantifiable' = %w[term quantifier]
+      when 'separator_1' # rule 'separator' => []
+        nil
+      when 'flags_0' # rule 'flags' => %[flags separator single_flag]
+        ### NEW
+        reduce_flags_0(aProduction, aRange, theTokens, theChildren)
+      when 'single_flag_0' # rule 'single_flag' => %w[CASE INSENSITIVE]
+        ### NEW
+        reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
+      when 'single_flag_1' # rule 'single_flag' => %w[MULTI LINE]
+        ### NEW
+        reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
+      when 'single_flag_2' # rule 'single_flag' => %w[ALL LAZY]
+        ### NEW
+        reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
+      # rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
+      when 'quantifiable_0'
+        reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'quantifiable' => %w[begin_anchor anchorable]
+      when 'quantifiable_1'
         reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
+      # rule 'quantifiable' => %w[anchorable end_anchor]
+      when 'quantifiable_2'
+        reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
+      when 'quantifiable_3' # rule 'quantifiable' => 'anchorable'
+        return_first_child(aRange, theTokens, theChildren)
+      # rule 'begin_anchor' => %w[STARTS WITH]
+      # rule 'begin_anchor' => %w[BEGIN WITH]
+      when 'begin_anchor_0', 'begin_anchor_1'
+        reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
+      when 'end_anchor_0' # rule 'end_anchor' => %w[MUST END]
+        reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
+      when 'anchorable_0' # rule 'anchorable' => 'assertable'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
+        reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
+      when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
+        reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
+      # rule 'assertion' => %w[IF FOLLOWED BY assertable]
+      when 'assertion_0'
+        reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
+      when 'assertion_1'
+        reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
+      # rule 'assertion' => %w[IF ALREADY HAD assertable]
+      when 'assertion_2'
+        reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
+      # rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
+      when 'assertion_3'
+        reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
+      when 'assertable_0' # rule 'assertable' => 'term'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'assertable_1' # rule 'assertable' => %w[term quantifier]
+        reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
       when 'term_0' # rule 'term' => 'atom'
         return_first_child(aRange, theTokens, theChildren)
       when 'term_1' # rule 'term' => 'alternation'
         return_first_child(aRange, theTokens, theChildren)
       when 'term_2' # rule 'term' => 'grouping'
         return_first_child(aRange, theTokens, theChildren)
+      when 'term_3' # rule 'term' => 'capturing_group'
+        return_first_child(aRange, theTokens, theChildren)
       when 'atom_0' # rule 'atom' => 'letter_range'
         return_first_child(aRange, theTokens, theChildren)
@@ -133,19 +211,34 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
       when 'alternation_0'
         reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
-      # rule 'alternatives' => %w[alternatives COMMA quantifiable]
+      # rule 'alternatives' => %w[alternatives separator quantifiable]
       when 'alternatives_0'
         reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
-      # rule 'alternatives' => %w[alternatives quantifiable]
-      when 'alternatives_1'
+      when 'alternatives_1' # rule 'alternatives' => 'quantifiable'
         reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
-      when 'alternatives_2' # rule 'alternatives' => 'quantifiable'
-        reduce_alternatives_2(aProduction, aRange, theTokens, theChildren)
+      when 'grouping_0' # rule 'grouping' => %w[LPAREN pattern RPAREN]
+        reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'capturing_group' => %w[CAPTURE assertable]
+      when 'capturing_group_0'
+        reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
+      # rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
+      when 'capturing_group_1'
+        reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
+      # rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
+      when 'capturing_group_2'
+        reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
+      # rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
+      when 'capturing_group_3'
+        reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
-      when 'grouping' # rule 'grouping' => %w[LPAREN pattern RPAREN]
-        reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
+      when 'var_name_0' # rule 'var_name' => 'STRING_LIT'
+        return_first_child(aRange, theTokens, theChildren)
       when 'quantifier_0' # rule 'quantifier' => 'ONCE'
         multiplicity(1, 1)
@@ -205,11 +298,11 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
       result = Regex::Concatenation.new(*chars)
     else
         if to_escape && Regex::Character::MetaChars.include?(aString)
-          result = Regex::Concatenation.new(Regex::Character.new("\\"),
+          result = Regex::Concatenation.new(Regex::Character.new("\\"),
             Regex::Character.new(aString))
         else
           result = Regex::Character.new(aString)
-        end
+        end
     end
     return result
@@ -237,21 +330,100 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
   def repetition(expressionToRepeat, aMultiplicity)
     return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
   end
-  # rule 'pattern' => %w[pattern COMMA quantifiable]
+  # rule 'expression' => %w[pattern separator flags]
+  def reduce_expression_0(aProduction, aRange, theTokens, theChildren)
+    @options = theChildren[2] if theChildren[2]
+    return_first_child(aRange, theTokens, theChildren)
+  end
+  # rule 'pattern' => %w[pattern separator quantifiable]
   def reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
     return Regex::Concatenation.new(theChildren[0], theChildren[2])
   end
-  # rule 'pattern' => %w[pattern quantifiable]
-  def reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
-    return Regex::Concatenation.new(theChildren[0], theChildren[1])
+  # rule 'flags' => %[flags separator single_flag]
+  def reduce_flags_0(aProduction, aRange, theTokens, theChildren)
+    theChildren[0] << theChildren[2]
   end
-  # rule 'quantifiable' => %w[term quantifier]
+  # rule 'single_flag' => %w[CASE INSENSITIVE]
+  def reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
+    return [ Regex::MatchOption.new(:IGNORECASE, true) ]
+  end
+  # rule 'single_flag' => %w[MULTI LINE]
+  def reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
+    return [ Regex::MatchOption.new(:MULTILINE, true) ]
+  end
+  # rule 'single_flag' => %w[ALL LAZY]
+  def reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
+    return [ Regex::MatchOption.new(:ALL_LAZY, true) ]
+  end
+  # rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
+  def reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
+    theChildren[1].begin_anchor = theChildren[0]
+    theChildren[1].end_anchor = theChildren[2]
+    return theChildren[1]
+  end
+  # rule 'quantifiable' => %w[begin_anchor anchorable]
   def reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
-    quantifier = theChildren.last
-    term = theChildren.first
+    theChildren[1].begin_anchor = theChildren[0]
+    return theChildren[1]
+  end
+  # rule 'quantifiable' => %w[anchorable end_anchor]
+  def reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
+    theChildren[0].end_anchor = theChildren[1]
+    return theChildren[0]
+  end
+  # rule 'begin_anchor' => %w[STARTS WITH]
+  # rule 'begin_anchor' => %w[BEGIN WITH]
+  def reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::Anchor.new('^')
+  end
+  # rule 'end_anchor' => %w[MUST END]
+  def reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::Anchor.new('$')
+  end
+  # rule 'anchorable' => %w[assertable assertion]
+  def reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
+    assertion = theChildren.last
+    assertion.children.unshift(theChildren[0])
+    return assertion
+  end
+  # rule 'assertion' => %w[IF FOLLOWED BY assertable]
+  def reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
+  end
+  # rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
+  def reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
+    return Regex::Lookaround.new(theChildren.last, :ahead, :negative)
+  end
+  # rule 'assertion' => %w[IF ALREADY HAD assertable]
+  def reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
+    return Regex::Lookaround.new(theChildren.last, :behind, :positive)
+  end
+  # rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
+  def reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
+    return Regex::Lookaround.new(theChildren.last, :behind, :negative)
+  end
+  # rule 'anchorable' => %w[term quantifier]
+  def reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
+    quantifier = theChildren[1]
+    term = theChildren[0]
     repetition(term, quantifier)
   end
@@ -348,37 +520,56 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
     raw_literal = theChildren[-1].token.lexeme.dup
     return string_literal(raw_literal)
   end
   # rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
   def reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
     return Regex::Alternation.new(*theChildren[3])
   end
-  # rule 'alternatives' => %w[alternatives COMMA quantifiable]
+  # rule 'alternatives' => %w[alternatives separator quantifiable]
   def reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
     return theChildren[0] << theChildren[-1]
   end
-  # rule 'alternatives' => %w[alternatives quantifiable]
-  def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
-    return theChildren[0] << theChildren[-1]
-  end
   # rule 'alternatives' => 'quantifiable'
-  def reduce_alternatives_2(aProduction, aRange, theTokens, theChildren)
+  def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
     return [theChildren.last]
   end
   # rule 'grouping' => %w[LPAREN pattern RPAREN]
   def reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
-    return Regex::NonCapturingGroup.new(theChildren[1])
+    return Regex::NonCapturingGroup.new(theChildren[1])
   end
+  # rule 'capturing_group' => %w[CAPTURE assertable]
+  def reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
+    return Regex::CapturingGroup.new(theChildren[1])
+  end
+  # rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
+  def reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
+    group = Regex::CapturingGroup.new(theChildren[1])
+    return Regex::Concatenation.new(group, theChildren[3])
+  end
+  # rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
+  def reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
+    name = theChildren[3].token.lexeme.dup
+    return Regex::CapturingGroup.new(theChildren[1], name)
+  end
+  # rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
+  def reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
+    name = theChildren[3].token.lexeme.dup
+    group = Regex::CapturingGroup.new(theChildren[1], name)
+    return Regex::Concatenation.new(group, theChildren[5])
+  end
   # rule 'quantifier' => %w[EXACTLY count TIMES]
   def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
     count = theChildren[1].token.lexeme.to_i
     multiplicity(count, count)
-  end
+  end
   # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
   def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)

data/examples/general/SRL/lib/grammar.rb CHANGED Viewed

@@ -9,6 +9,8 @@ module SRL
     add_terminals('LPAREN', 'RPAREN', 'COMMA')
     add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
     add_terminals('LITERALLY', 'STRING_LIT')
+    add_terminals('BEGIN', 'STARTS', 'WITH')
+    add_terminals('MUST', 'END')
     add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
     add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
     add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
@@ -17,16 +19,42 @@ module SRL
     add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
     add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
     add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
+    add_terminals('IF', 'FOLLOWED', 'BY', 'NOT')
+    add_terminals('ALREADY', 'HAD')
+    add_terminals('CAPTURE', 'AS', 'UNTIL')
+    add_terminals('CASE', 'INSENSITIVE', 'MULTI', 'ALL')
+    add_terminals('LAZY')
-    rule 'srl' => 'pattern'
-    rule 'pattern' => %w[pattern COMMA quantifiable]
-    rule 'pattern' => %w[pattern quantifiable]
+    rule 'srl' => 'expression'
+    rule 'expression' => %w[pattern separator flags]
+    rule 'expression' => 'pattern'
+    rule 'pattern' => %w[pattern separator quantifiable]
     rule 'pattern' => 'quantifiable'
-    rule 'quantifiable' => 'term'
-    rule 'quantifiable' => %w[term quantifier]
+    rule 'separator' => 'COMMA'
+    rule 'separator' => []
+    rule 'flags' => %[flags separator single_flag]
+    rule 'single_flag' => %w[CASE INSENSITIVE]
+    rule 'single_flag' => %w[MULTI LINE]
+    rule 'single_flag' => %w[ALL LAZY]
+    rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
+    rule 'quantifiable' => %w[begin_anchor anchorable]
+    rule 'quantifiable' => %w[anchorable end_anchor]
+    rule 'quantifiable' => 'anchorable'
+    rule 'begin_anchor' => %w[STARTS WITH]
+    rule 'begin_anchor' => %w[BEGIN WITH]
+    rule 'end_anchor' => %w[MUST END]
+    rule 'anchorable' => 'assertable'
+    rule 'anchorable' => %w[assertable assertion]
+    rule 'assertion' => %w[IF FOLLOWED BY assertable]
+    rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
+    rule 'assertion' => %w[IF ALREADY HAD assertable]
+    rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
+    rule 'assertable' => 'term'
+    rule 'assertable' => %w[term quantifier]
     rule 'term' => 'atom'
     rule 'term' => 'alternation'
     rule 'term' => 'grouping'
+    rule 'term' => 'capturing_group'
     rule 'atom' => 'letter_range'
     rule 'atom' => 'digit_range'
     rule 'atom' => 'character_class'
@@ -49,10 +77,14 @@ module SRL
     rule 'special_char' => %w[NEW LINE]
     rule 'literal' => %w[LITERALLY STRING_LIT]
     rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
-    rule 'alternatives' => %w[alternatives COMMA quantifiable]
-    rule 'alternatives' => %w[alternatives quantifiable]
+    rule 'alternatives' => %w[alternatives separator quantifiable]
     rule 'alternatives' => 'quantifiable'
     rule 'grouping' => %w[LPAREN pattern RPAREN]
+    rule 'capturing_group' => %w[CAPTURE assertable]
+    rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
+    rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
+    rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
+    rule 'var_name' => 'STRING_LIT'
     rule 'quantifier' => 'ONCE'
     rule 'quantifier' => 'TWICE'
     rule 'quantifier' => %w[EXACTLY count TIMES]

data/examples/general/SRL/lib/regex/alternation.rb CHANGED Viewed

@@ -13,10 +13,11 @@ class Alternation < PolyadicExpression
 		super(theChildren)
 	end
-public
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the concatented expressions.
-	def to_str()
+	def text_repr()
 		result_children = children.map { |aChild| aChild.to_str() }
 		result =  '(?:' + result_children.join('|') + ')'

data/examples/general/SRL/lib/regex/anchor.rb ADDED Viewed

@@ -0,0 +1,48 @@
+# File: anchor.rb
+require_relative "atomic_expression"	# Access the superclass
+module Regex # This module is used as a namespace
+  # An anchor is a zero-width assertion based on the current position.
+  class Anchor < AtomicExpression
+    # A Hash for converting a lexeme to a symbolic value
+    AnchorToSymbol = {
+      # Lexeme => Symbol value
+      '^' => :soLine,	# Start of line
+      '$' => :eoLine,	# End of line
+      '\A' => :soSubject,
+      '\b' => :wordBoundary,
+      '\B' => :nonAtWordBoundary,
+      '\G' => :firstMatch,
+      '\z' => :eoSubject,
+      '\Z' => :eoSubjectOrBeforeNLAtEnd
+    }
+    # A symbolic value that identifies the type of assertion to perform
+    attr_reader(:kind)
+    # Constructor
+    # @param aKind [String] Lexeme representation of the anchor
+    def initialize(aKind)
+      @kind = valid_kind(aKind)
+    end
+    public
+    # Conversion method re-definition.
+    # Purpose: Return the String representation of the expression.
+    def to_str()
+      return AnchorToSymbol.rassoc(kind).first()
+    end
+    private
+    # Return the symbolic value corresponding to the given lexeme.
+    def valid_kind(aKind)
+      return AnchorToSymbol[aKind]
+    end
+  end # class
+end # module
+# End of file

data/examples/general/SRL/lib/regex/capturing_group.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# File: capturing_group.rb
+require_relative "monadic_expression"	# Access the superclass
+module Regex # This module is used as a namespace
+  # An association between a capture variable and an expression
+  # the subject text in the same serial arrangement
+  class CapturingGroup < MonadicExpression
+    # The capture variable id. It is a Fixnum when the capture group gets a sequence number,
+    # a String when it is an user-defined name
+    attr_reader(:id)
+    # When true, then capturing group forbids backtracking requests from its parent expression.
+    attr_reader(:no_backtrack)
+    # Constructor.
+    # [aChildExpression]	A sub-expression to match. When successful the matching text is assigned to the capture variable.
+    # [theId] The id of the capture variable.
+    # [noBacktrack] A flag that specifies whether the capturing group forbids backtracking requests from its parent expression.
+    def initialize(aChildExpression, theId = nil, noBacktrack = false)
+      super(aChildExpression)
+      @id = theId
+      @no_backtrack = noBacktrack
+    end
+  public
+    # Return true iff the capturing group has a name (and not )
+    def named?()
+      return id.kind_of?(String)
+    end
+    # Conversion method re-definition.
+    # Purpose: Return the String representation of the captured expression.
+    def to_str()
+      prefix = named? ? "?<#{id}>" : ''
+      atomic = no_backtrack ? '?>' : ''
+      if child.is_a?(Regex::NonCapturingGroup)
+        # Minor optimization
+        result = '(' + atomic + prefix + child.child.to_str + ")"
+      else
+        result = '(' + atomic + prefix + child.to_str + ")"
+      end
+      return result
+    end
+  end # class
+end # module
+# End of file

data/examples/general/SRL/lib/regex/char_class.rb CHANGED Viewed

@@ -18,10 +18,11 @@ module Regex # This module is used as a namespace
       @negated = to_negate
     end
-  public
+    protected
     # Conversion method re-definition.
     # Purpose: Return the String representation of the character class.
-    def to_str()
+    def text_repr()
       result_children = children.inject('') do |subResult, aChild|
         if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
           subResult << "\\" # Escape meta-character...

data/examples/general/SRL/lib/regex/char_range.rb CHANGED Viewed

@@ -26,11 +26,13 @@ public
 	# Return the upper bound of the range.
 	def upper()
 		return children.last
-	end
+	end
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the concatented expressions.
-	def to_str()
+	def text_repr()
 		result = lower.to_str() + '-' + upper.to_str()
 		return result

data/examples/general/SRL/lib/regex/char_shorthand.rb CHANGED Viewed

@@ -28,10 +28,11 @@ module Regex # This module is used as a namespace
       @shortname = valid_shortname(aShortname)
     end
-  public
+    protected
     # Conversion method re-definition.
     # Purpose: Return the String representation of the expression.
-    def to_str()
+    def text_repr()
       return "\\#{shortname}"
     end

data/examples/general/SRL/lib/regex/character.rb CHANGED Viewed

@@ -106,20 +106,6 @@ public
 		self.class.codepoint2char(@codepoint)
 	end
-	# Conversion method re-definition.
-	# Purpose: Return the String representation of the expression.
-	# If the Character was initially from a text (the lexeme), then the lexeme is returned back.
-	# Otherwise the character corresponding to the codepoint is returned.
-	def to_str()
-		if lexeme.nil?
-			result = char()
-		else
-			result = lexeme.dup()
-		end
-		return result
-	end
 	# Returns true iff this Character and parameter 'another' represent the same character.
 	# [another] any Object. The way the equality is tested depends on the another's class
 	# Example:
@@ -152,6 +138,22 @@ public
 	def explain()
 		return "the character '#{to_str()}'"
 	end
+  protected
+  # Conversion method re-definition.
+	# Purpose: Return the String representation of the expression.
+	# If the Character was initially from a text (the lexeme), then the lexeme is returned back.
+	# Otherwise the character corresponding to the codepoint is returned.
+	def text_repr()
+		if lexeme.nil?
+			result = char()
+		else
+			result = lexeme.dup()
+		end
+		return result
+	end
 private
 	# Convertion method that returns a codepoint for the given two characters (digram) escape sequence.

data/examples/general/SRL/lib/regex/concatenation.rb CHANGED Viewed

@@ -4,24 +4,25 @@ require_relative 'polyadic_expression'	# Access the superclass
 module Regex # This module is used as a namespace
-# Abstract class. A n-ary matching operator.
+# Abstract class. A n-ary matching operator.
 # It succeeds when each child succeeds to match the subject text in the same
 # serial arrangement than defined by this concatenation.
 class Concatenation < PolyadicExpression
 	# Constructor.
 	def initialize(*theChildren)
 		super(theChildren)
 	end
-public
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the concatented expressions.
-	def to_str()
-		result = children.inject('') { |result, aChild|
+	def text_repr()
+		result = children.inject('') { |result, aChild|
 			result << aChild.to_str()
 		}
 		return result
 	end

data/examples/general/SRL/lib/regex/expression.rb CHANGED Viewed

@@ -5,7 +5,11 @@ require_relative 'abstract_method'
 module Regex # This module is used as a namespace
 # Abstract class. The generalization of any valid regular (sub)expression.
-class Expression
+class Expression
+  attr_accessor :begin_anchor
+  attr_accessor :end_anchor
+  # Constructor
 	def initialize()
 	end
@@ -20,20 +24,37 @@ public
 	def cardinality(theParentOptions) abstract_method
 	end
-protected
 	# Determine the matching options to apply to this object, given the options coming from the parent
 	# and options that are local to this object. Local options take precedence.
-	# [theParentOptions] a Hash of matching options. They are overridden by options with same name
+	# @param theParentOptions [Hash] matching options. They are overridden by options with same name
 	# that are bound to this object.
 	def options(theParentOptions)
 		resulting_options = theParentOptions.merge(@local_options)
 		return resulting_options
 	end
-	# Abstract conversion method.
+	# Template method.
 	# Purpose: Return the String representation of the expression.
-	def to_str() abstract_method
+	def to_str()
+    result = ''
+    result << prefix
+    result << text_repr
+    result << suffix
+    return result
 	end
+  protected
+  def prefix()
+    begin_anchor ? begin_anchor.to_str : ''
+  end
+  def suffix()
+    end_anchor ? end_anchor.to_str : ''
+  end
 end # class

data/examples/general/SRL/lib/regex/lookaround.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# File: Lookaround.rb
+########################
+# TODO: make it a binary expression
+########################
+require_relative 'polyadic_expression'	# Access the superclass
+module Regex # This module is used as a namespace
+  # Lookaround is a zero-width assertion just like the start and end of line anchors.
+  # The difference is that lookarounds will actually match characters, but only return the result of the match: match or no match.
+  # That is why they are called "assertions". They do not consume characters from the subject,
+  # but only assert whether a match is possible or not.
+  class Lookaround < PolyadicExpression
+    # The "direction" of the lookaround. Can be ahead or behind. It specifies the relative position of the
+    # expression to match compared to the current 'position' in the subject text.
+    attr_reader(:dir)
+    # The kind indicates whether the assertion is positive (succeeds when there is a match) or negative
+    # (assertion succeeds when there is NO match).
+    attr_reader(:kind)
+    # Constructor.
+    # [assertedExpression]	A sub-expression to match.
+    # [theDir]	One of the following values: [ :ahead, :behind ]
+    # [theKind] One of the following values: [ :positive, :negative ]
+    def initialize(assertedExpression, theDir, theKind)
+      super([assertedExpression])
+      @dir, @kind = theDir, theKind
+    end
+  public
+    # Conversion method re-definition.
+    # Purpose: Return the String representation of the captured expression.
+    def to_str()
+      result = children[0].to_str
+      dir_syntax = (dir == :ahead) ? '' : '<'
+      kind_syntax = (kind == :positive)? '=' : '!'
+      result << '(?' + dir_syntax + kind_syntax + children[1].to_str + ")"
+      return result
+    end
+  end # class
+end # module
+# End of file

data/examples/general/SRL/lib/regex/match_option.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# File: MatchOption.rb
+module Regex # This module is used as a namespace
+# Represents an option that influences the way a regular (sub)expression can perform its matching.
+class MatchOption
+	# The symbolic name of the option
+	attr_reader(:name)
+	# An indicator that tells whether the option is turned on or off
+	attr_reader(:setting)
+	# Constructor.
+	def initialize(theName, theSetting)
+		@name, @setting = theName, theSetting
+	end
+public
+	# Equality operator
+	def ==(another)
+		return true if self.object_id == another.object_id
+		if another.kind_of?(MatchOption)
+			isEqual = ((name == another.name) && (setting == another.setting))
+		else
+			isEqual = false
+		end
+		return isEqual
+	end
+end # class
+end # module
+# End of file

data/examples/general/SRL/lib/regex/non_capturing_group.rb CHANGED Viewed

@@ -14,10 +14,11 @@ module Regex # This module is used as a namespace
       super(aChildExpression)
     end
-  public
+    protected
     # Conversion method re-definition.
     # Purpose: Return the String representation of the captured expression.
-    def to_str()
+    def text_repr()
       result = '(?:' + all_child_text() + ")"
       return result
     end

data/examples/general/SRL/lib/regex/repetition.rb CHANGED Viewed

@@ -16,10 +16,11 @@ class Repetition < MonadicExpression
 		@multiplicity = aMultiplicity
 	end
-public
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the concatented expressions.
-	def to_str()
+	def text_repr()
 		result = all_child_text() + multiplicity.to_str()
 		return result
 	end

data/examples/general/SRL/lib/regex/wildcard.rb CHANGED Viewed

@@ -12,10 +12,11 @@ class Wildcard < AtomicExpression
 		super
 	end
-public
+  protected
 	# Conversion method re-definition.
 	# Purpose: Return the String representation of the expression.
-	def to_str()
+	def text_repr()
 		return '.'
 	end

data/examples/general/SRL/lib/regex_repr.rb CHANGED Viewed

@@ -8,3 +8,6 @@ require_relative './regex/char_shorthand'
 require_relative './regex/wildcard'
 require_relative './regex/alternation'
 require_relative './regex/non_capturing_group'
+require_relative './regex/anchor'
+require_relative './regex/lookaround'
+require_relative './regex/capturing_group'

data/examples/general/SRL/lib/tokenizer.rb CHANGED Viewed

@@ -22,41 +22,60 @@ module SRL
       ')' => 'RPAREN',
       ',' => 'COMMA'
     }.freeze
     # Here are all the SRL keywords (in uppercase)
     @@keywords = %w[
+      ALL
+      ALREADY
       AND
       ANY
       ANYTHING
+      AS
       AT
       BACKSLASH
+      BEGIN
       BETWEEN
+      BY
+      CAPTURE
+      CASE
       CHARACTER
       DIGIT
+      END
       EXACTLY
+      FOLLOWED
       FROM
+      HAD
+      IF
+      INSENSITIVE
+      LAZY
       LEAST
       LETTER
       LINE
       LITERALLY
       MORE
+      MULTI
+      MUST
       NEVER
       NEW
       NO
+      NOT
       NUMBER
       OF
       ONCE
       ONE
       OPTIONAL
       OR
+      STARTS
       TAB
       TIMES
       TO
       TWICE
+      UNTIL
       UPPERCASE
       WHITESPACE
+      WITH
     ].map { |x| [x, x] } .to_h
     class ScanError < StandardError; end
     def initialize(source, aGrammar)
@@ -81,16 +100,16 @@ module SRL
       skip_whitespaces
       curr_ch = scanner.peek(1)
       return nil if curr_ch.nil? || curr_ch.empty?
       token = nil
       if '(),'.include? curr_ch
         # Delimiters, separators => single character token
-        token = build_token(@@lexeme2name[curr_ch], scanner.getch)
+        token = build_token(@@lexeme2name[curr_ch], scanner.getch)
       elsif (lexeme = scanner.scan(/[0-9]{2,}/))
         token = build_token('INTEGER', lexeme) # An integer has two or more digits
       elsif (lexeme = scanner.scan(/[0-9]/))
-        token = build_token('DIGIT_LIT', lexeme)
+        token = build_token('DIGIT_LIT', lexeme)
       elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
         token = build_token(@@keywords[lexeme.upcase], lexeme)
         # TODO: handle case unknown identifier
@@ -111,7 +130,7 @@ module SRL
       return token
     end
     def build_token(aSymbolName, aLexeme)
       token_type = name2symbol[aSymbolName]
       begin
@@ -120,7 +139,7 @@ module SRL
         puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
         raise ex
       end
       return token
     end

data/examples/general/SRL/spec/integration_spec.rb CHANGED Viewed

@@ -151,7 +151,6 @@ describe 'Integration tests:' do
     end
   end # context
   context 'Parsing special character declarations:' do
     it "should parse 'tab' syntax" do
       result = parse('tab')
@@ -197,11 +196,16 @@ describe 'Integration tests:' do
       message_prefix = /Premature end of input after ','/
       expect(result.failure_reason.message).to match(message_prefix)
     end
+    it 'should parse concatenation' do
+      result = parse('any of (literally "sample", (digit once or more))')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
+    end
-    it "should parse a sequence of patterns" do
-      #
-      # DEBUG When I put a comma at the end ... looping endlessly
-      #
+    it "should parse a long sequence of patterns" do
       source = <<-ENDS
       any of (any character, one of "._%-+") once or more,
       literally "@",
@@ -289,6 +293,145 @@ ENDS
       expect(regexp.to_str).to eq('[p-t]{10,}')
     end
   end # context
+  context 'Parsing lookaround:' do
+    it 'should parse positive lookahead' do
+      result = parse('letter if followed by (anything once or more, digit)')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
+    end
+    it 'should parse negative lookahead' do
+      result = parse('letter if not followed by (anything once or more, digit)')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
+    end
+    it 'should parse positive lookbehind' do
+      result = parse('literally "bar" if already had literally "foo"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('bar(?<=foo)')
+    end
+    it 'should parse negative lookbehind' do
+      result = parse('literally "bar" if not already had literally "foo"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('bar(?<!foo)')
+    end
+  end # context
+  context 'Parsing capturing group:' do
+    it 'should parse simple anonymous capturing group' do
+      result = parse('capture(literally "sample")')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(sample)')
+    end
+    it 'should parse complex anonymous capturing group' do
+      result = parse('capture(any of (literally "sample", (digit once or more)))')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
+    end
+    it 'should parse simple anonymous until capturing group' do
+      result = parse('capture anything once or more until literally "!"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(.+)!')
+    end
+    it 'should parse complex named capturing group' do
+      result = parse('capture(any of (literally "sample", (digit once or more))) as "foo"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
+    end
+    it 'should parse a sequence with named capturing groups' do
+      source = <<-ENDS
+      capture (anything once or more) as "first",
+      literally " - ",
+      capture literally "second part" as "second"
+ENDS
+      result = parse(source)
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
+    end
+    it 'should parse complex named until capturing group' do
+      result = parse('capture (anything once or more) as "foo" until literally "m"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('(?<foo>.+)m')
+    end
+  end # context
+  context 'Parsing anchors:' do
+    it 'should parse begin anchors' do
+      result = parse('starts with literally "match"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('^match')
+    end
+    it 'should parse begin anchors (alternative syntax)' do
+      result = parse('begin with literally "match"')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('^match')
+    end
+    it 'should parse end anchors' do
+      result = parse('literally "match" must end')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('match$')
+    end
+    it 'should parse combination of begin and end anchors' do
+      result = parse('starts with literally "match" must end')
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      expect(regexp.to_str).to eq('^match$')
+    end
+    it "should accept anchor with a sequence of patterns" do
+      source = <<-ENDS
+      begin with any of (digit, letter, one of ".-") once or more,
+      literally ".",
+      letter at least 2 times must end
+ENDS
+      result = parse(source)
+      expect(result).to be_success
+      regexp = regexp_repr(result)
+      # SRL expect: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
+      expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
+    end
+  end # context
 end # describe

data/lib/rley/constants.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.5.11'.freeze
+  Version = '0.5.12'.freeze
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm".freeze

data/lib/rley/parser/parse_tree_builder.rb CHANGED Viewed

@@ -135,7 +135,7 @@ module Rley # This module is used as a namespace
               process_middle_entry(anEntry, anIndex)
             end
           else
-            $stderr.puts "Internal Errore '#{anEvent}'"
+            $stderr.puts "Internal Errore '#{anEvent}', entry: #{anEntry}, index: #{anIndex}"
             raise NotImplementedError
         end
       end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.5.11
+  version: 0.5.12
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-01-25 00:00:00.000000000 Z
+date: 2018-02-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: coveralls
@@ -149,7 +149,9 @@ files:
 - examples/general/SRL/lib/parser.rb
 - examples/general/SRL/lib/regex/abstract_method.rb
 - examples/general/SRL/lib/regex/alternation.rb
+- examples/general/SRL/lib/regex/anchor.rb
 - examples/general/SRL/lib/regex/atomic_expression.rb
+- examples/general/SRL/lib/regex/capturing_group.rb
 - examples/general/SRL/lib/regex/char_class.rb
 - examples/general/SRL/lib/regex/char_range.rb
 - examples/general/SRL/lib/regex/char_shorthand.rb
@@ -157,6 +159,8 @@ files:
 - examples/general/SRL/lib/regex/compound_expression.rb
 - examples/general/SRL/lib/regex/concatenation.rb
 - examples/general/SRL/lib/regex/expression.rb
+- examples/general/SRL/lib/regex/lookaround.rb
+- examples/general/SRL/lib/regex/match_option.rb
 - examples/general/SRL/lib/regex/monadic_expression.rb
 - examples/general/SRL/lib/regex/multiplicity.rb
 - examples/general/SRL/lib/regex/non_capturing_group.rb