RubyGems - srl_ruby - Versions diffs - 0.2.5 → 0.2.6 - Mend

srl_ruby 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +13 -4
data/lib/regex/raw_expression.rb +23 -0
data/lib/srl_ruby/ast_builder.rb +20 -5
data/lib/srl_ruby/grammar.rb +22 -7
data/lib/srl_ruby/regex_repr.rb +2 -1
data/lib/srl_ruby/tokenizer.rb +13 -8
data/lib/srl_ruby/version.rb +1 -1
data/spec/acceptance/srl_test_suite_spec.rb +5 -0
data/spec/srl_ruby_spec.rb +46 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 13bb293008059e97968eab3b976e05f7fe9c353e
-  data.tar.gz: 933e9531ae294fe4adb46e3c7ef87c58a3243388
+  metadata.gz: 475a892be14d441ff82714324f489707839eae57
+  data.tar.gz: 52ad8fffe6fb8abe17e99e690dc182bb0b3a48b0
 SHA512:
-  metadata.gz: 1c585bfad3f6330b87f47ab7c1dae115bc953a50220bc3d87642524c523cfbe1ae19193cfc0173730d456ec6691611abd59964ea4722a42bdbb1571f6c06ce9c
-  data.tar.gz: f835d81332961e020addee4ec8b5e4c46378f29ad12a7497fb03f5545eee1c884b1da5b8a621d4f76713b348b3b324fc96c4eb9f60b98037c1c7e9b98ab5d9c6
+  metadata.gz: 24b028b5becb5f2ac57c6fbf1aaa5eb45d7b3e3bde1e8ea2892b56f7d67fc643bf8d28cff0e431e7a3f97c996b0c44321bfd52eb17d818daeb51a334cb2d056b
+  data.tar.gz: b21b2fefc35eac997bd06a0a4a7bc586d80eba2074cfb365c7a22a4e6390e48a45960df3132b09abc2e53569c5c6ad4f925510191808212ba6a8b991dfc259a8

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,14 @@
+## [0.2.6] - 2018-04-03
+SrlRuby passes 13 tests out of 15 from standard SRL test suite.
+### Changed
+- Class `SrlRuby#Tokenizer` added CHAR_CLASS literal and keywords EITHER, NONE, EITHER,
+- Grammar expanded to support 'CARRIAGE RETURN', 'VERTICAL TAB', 'WORD', 'NO WORD' expressions
+- Class `SrlRuby::ASTBuilder` updates to reflect changes in the grammar.
+- File `acceptance/srl_test_suite_spec.rb`. 13 test files from official test suite are passing.
 ## [0.2.5] - 2018-04-02
-SrlRuby passes 12 tests out of 15 standard SRL tests in total.
+SrlRuby passes 12 tests out of 15 from standard SRL test suite.
 ### Changed
 - Class `SrlRuby#Tokenizer` added keywords CARRIAGE, RETURN, VERTICAL, WORD
 - Grammar expanded to support 'CARRIAGE RETURN', 'VERTICAL TAB', 'WORD', 'NO WORD' expressions
@@ -8,7 +17,7 @@ SrlRuby passes 12 tests out of 15 standard SRL tests in total.
 ## [0.2.4] - 2018-04-02
-SrlRuby passes 10 tests out of 15 standard SRL tests in total.
+SrlRuby passes 10 tests out of 15 from standard SRL test suite.
 ### Changed
 - File `lib/srl_ruby/grammar.rb` grammar refactoring. Added support for new 'no digit' SRL expression.
 - Class `SrlRuby::ASTBuilder` updates to reflect changes in the grammar.
@@ -27,7 +36,7 @@ SrlRuby passes 10 tests out of 15 standard SRL tests in total.
 - Fixed rley version dependency
 ## [0.2.1] - 2018-03-15
-SrlRuby passes 7 tests out of 15 standard SRL tests in total.
+SrlRuby passes 7 tests out of 15 from standard SRL test suite.
 ### Changed
 - File `acceptance/srl_test_suite_spec.rb`. More examples in spec file.
 - File `ast_builder.rb` updates to reflect grammar changes.
@@ -39,7 +48,7 @@ SrlRuby passes 7 tests out of 15 standard SRL tests in total.
 - File `ast_builder.rb` fixed anchor implementation.
 ## [0.2.0] - 2018-03-14
-SrlRuby passes 3 standard out of 15 standard SRL tests in total.
+SrlRuby passes 3 tests out of 15 from standard SRL test suite.
 ### Added
 - Added `spec/acceptance/support` directory. It contains test harness to use the .rule files from standard SRL test suite.
 - Added `acceptance/srl_test_suite_spec.rb`file. Spec file designed to standard SRL test suite. At this date, SrlRuby passes 3 tests out of 15 tests in total.

data/lib/regex/raw_expression.rb ADDED Viewed

@@ -0,0 +1,23 @@
+require_relative 'atomic_expression' # Access the superclass
+module Regex # This module is used as a namespace
+  # A raw expression is a string that will be copied verbatim (as is)
+  # in the generated regular expression.
+  class RawExpression < AtomicExpression
+	attr_reader :raw
+    # Constructor
+    def initialize(rawLiteral)
+      super()
+      @raw = rawLiteral
+    end
+    protected
+    # Conversion method re-definition.
+    # Purpose: Return the String representation of the expression.
+	alias_method :text_repr, :raw
+  end # class
+end # module
+# End of file

data/lib/srl_ruby/ast_builder.rb CHANGED Viewed

@@ -272,15 +272,24 @@ module SrlRuby
       return Regex::CharClass.new(false, *alternatives)
     end
+    # rule('character_class' => %w[NONE OF STRING_LIT]).as 'none_of'
+    def reduce_none_of(_production, _range, _tokens, theChildren)
+      raw_literal = theChildren[-1].token.lexeme.dup
+      chars = raw_literal.chars.map do |ch|
+        Regex::Character.new(ch)
+      end
+      Regex::CharClass.new(true, *chars)
+    end
     # rule('special_char' => 'TAB').as 'tab'
     def reduce_tab(_production, _range, _tokens, _children)
       Regex::Character.new('\t')
     end
     # rule('special_char' => ' VERTICAL TAB').as 'vtab'
     def reduce_vtab(_production, _range, _tokens, _children)
       Regex::Character.new('\v')
-    end
+    end
     # rule('special_char' => 'BACKSLASH').as 'backslash'
     def reduce_backslash(_production, _range, _tokens, _children)
@@ -293,7 +302,7 @@ module SrlRuby
       # TODO: control portability
       Regex::Character.new('\n')
     end
     # rule('special_char' => %w[CARRIAGE RETURN]).as 'carriage_return'
     def reduce_carriage_return(_production, _range, _tokens, _children)
       Regex::Character.new('\r')
@@ -303,11 +312,11 @@ module SrlRuby
     def reduce_word(_production, _range, _tokens, _children)
       Regex::Anchor.new('\b')
     end
     # rule('special_char' => %w[NO WORD]).as 'no word'
     def reduce_no_word(_production, _range, _tokens, _children)
       Regex::Anchor.new('\B')
-    end
+    end
     # rule('literal' => %w[LITERALLY STRING_LIT]).as 'literally'
     def reduce_literally(_production, _range, _tokens, theChildren)
@@ -317,6 +326,12 @@ module SrlRuby
       return string_literal(raw_literal)
     end
+    # rule('raw' => %w[RAW STRING_LIT]).as 'raw_literal'
+    def reduce_raw_literal(_production, _range, _tokens, theChildren)
+      raw_literal = theChildren[-1].token.lexeme.dup
+      return Regex::RawExpression.new(raw_literal)
+    end
     # rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
     def reduce_any_of(_production, _range, _tokens, theChildren)
       first_alternative = theChildren[3].first

data/lib/srl_ruby/grammar.rb CHANGED Viewed

@@ -4,17 +4,22 @@ module SrlRuby
   ########################################
   # SRL grammar
   builder = Rley::Syntax::GrammarBuilder.new do
+    # Separators...
     add_terminals('LPAREN', 'RPAREN', 'COMMA')
-    add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
+    # Literal values...
+    add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT', 'CHAR_CLASS')
     add_terminals('LITERALLY', 'STRING_LIT', 'IDENTIFIER')
+    # Keywords...
     add_terminals('BEGIN', 'STARTS', 'WITH')
-    add_terminals('MUST', 'END')
+    add_terminals('MUST', 'END', 'RAW')
     add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
-    add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
+    add_terminals('DIGIT', 'NUMBER', 'ANY', 'EITHER', 'NO')
     add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
     add_terminals('TAB', 'BACKSLASH', 'NEW', 'LINE', 'WORD')
-    add_terminals('CARRIAGE', 'RETURN', 'VERTICAL', 'OF', 'ONE')
-    add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
+    add_terminals('CARRIAGE', 'RETURN', 'VERTICAL', 'OF', 'NONE')
+    add_terminals('ONE', 'EXACTLY', 'TIMES', 'ONCE', 'TWICE')
     add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
     add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
     add_terminals('IF', 'FOLLOWED', 'BY', 'NOT')
@@ -23,6 +28,7 @@ module SrlRuby
     add_terminals('CASE', 'INSENSITIVE', 'MULTI', 'ALL')
     add_terminals('LAZY')
+    # Grammar rules...
     rule('srl' => 'expression').as 'start_rule'
     rule('expression' => %w[pattern flags]).as 'flagged_expr'
     rule('expression' => 'pattern').as 'simple_expr'
@@ -60,6 +66,7 @@ module SrlRuby
     rule('atom' => 'character_class').as 'character_class_atom'
     rule('atom' => 'special_char').as 'special_char_atom'
     rule('atom' => 'literal').as 'literal_atom'
+    rule('atom' => 'raw').as 'raw_atom'
     rule('letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'lowercase_from_to'
     rule('letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'uppercase_from_to'
     rule('letter_range' => 'LETTER').as 'any_lowercase'
@@ -72,7 +79,12 @@ module SrlRuby
     rule('character_class' => 'WHITESPACE').as 'whitespace'
     rule('character_class' => %w[NO WHITESPACE]).as 'no_whitespace'
     rule('character_class' => 'ANYTHING').as 'anything'
-    rule('character_class' => %w[ONE OF STRING_LIT]).as 'one_of'
+    rule('character_class' => %w[ONE OF cclass]).as 'one_of'
+    rule('character_class' => %w[NONE OF cclass]).as 'none_of'
+    rule('cclass' => 'STRING_LIT').as 'quoted_cclass' # Preferred syntax
+    rule('cclass' => 'INTEGER').as 'digits_cclass'
+    rule('cclass' => 'IDENTIFIER').as 'identifier_cclass'
+    rule('cclass' => 'CHAR_CLASS').as 'unquoted_cclass'
     rule('special_char' => 'TAB').as 'tab'
     rule('special_char' => 'VERTICAL TAB').as 'vtab'
     rule('special_char' => 'BACKSLASH').as 'backslash'
@@ -81,9 +93,12 @@ module SrlRuby
     rule('special_char' => %w[WORD]).as 'word'
     rule('special_char' => %w[NO WORD]).as 'no_word'
     rule('literal' => %w[LITERALLY STRING_LIT]).as 'literally'
-    rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
+    rule('raw' => 'RAW STRING_LIT').as 'raw_literal'
+    rule('alternation' => %w[any_or_either OF LPAREN alternatives RPAREN]).as 'any_of'
     rule('alternatives' => %w[alternatives separator quantifiable]).as 'alternative_list'
     rule('alternatives' => 'quantifiable').as 'simple_alternative'
+    rule('any_or_either' => 'ANY').as 'any_keyword'
+    rule('any_or_either' => 'EITHER').as 'either_keyword'
     rule('grouping' => %w[LPAREN pattern RPAREN]).as 'grouping_parenthenses'
     rule('capturing_group' => %w[CAPTURE assertable]).as 'capture'
     rule('capturing_group' => %w[CAPTURE assertable UNTIL assertable]).as 'capture_until'

data/lib/srl_ruby/regex_repr.rb CHANGED Viewed

@@ -11,4 +11,5 @@ require_relative '../regex/non_capturing_group'
 require_relative '../regex/anchor'
 require_relative '../regex/lookaround'
 require_relative '../regex/capturing_group'
-require_relative '../regex/match_option'
+require_relative '../regex/match_option'
+require_relative '../regex/raw_expression'

data/lib/srl_ruby/tokenizer.rb CHANGED Viewed

@@ -42,6 +42,7 @@ module SrlRuby
       CASE
       CHARACTER
       DIGIT
+      EITHER
       END
       EXACTLY
       FOLLOWED
@@ -60,6 +61,7 @@ module SrlRuby
       NEVER
       NEW
       NO
+      NONE
       NOT
       NUMBER
       OF
@@ -67,6 +69,7 @@ module SrlRuby
       ONE
       OPTIONAL
       OR
+      RAW
       RETURN
       STARTS
       TAB
@@ -111,22 +114,24 @@ module SrlRuby
       if '(),'.include? curr_ch
         # Delimiters, separators => single character token
         token = build_token(@@lexeme2name[curr_ch], scanner.getch)
-      elsif (lexeme = scanner.scan(/[0-9]{2,}/))
+      elsif (lexeme = scanner.scan(/[0-9]{2,}((?=\s)|$)/))
         token = build_token('INTEGER', lexeme) # An integer has 2..* digits
-      elsif (lexeme = scanner.scan(/[0-9]/))
+      elsif (lexeme = scanner.scan(/[0-9]((?=\s)|$)/))
         token = build_token('DIGIT_LIT', lexeme)
-      elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]+/))
-        keyw = @@keywords[lexeme.upcase]
-        tok_type = keyw ? keyw : 'IDENTIFIER'
-        token = build_token(tok_type, lexeme)
-      elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
-        token = build_token('LETTER_LIT', lexeme)
       elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
         unquoted = lexeme.gsub(/(^")|("$)/, '')
         token = build_token('STRING_LIT', unquoted)
       elsif (lexeme = scanner.scan(/'(?:\\'|[^'])*'/)) # Single quotes literal?
         unquoted = lexeme.gsub(/(^')|('$)/, '')
         token = build_token('STRING_LIT', unquoted)
+      elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
+        token = build_token('LETTER_LIT', lexeme)
+      elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]+/))
+        keyw = @@keywords[lexeme.upcase]
+        tok_type = keyw ? keyw : 'IDENTIFIER'
+        token = build_token(tok_type, lexeme)
+      elsif (lexeme = scanner.scan(/[^,"\s]{2,}/))
+        token = build_token('CHAR_CLASS', lexeme)
       else # Unknown token
         erroneous = curr_ch.nil? ? '' : curr_ch
         sequel = scanner.scan(/.{1,20}/)

data/lib/srl_ruby/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SrlRuby
-  VERSION = '0.2.5'.freeze
+  VERSION = '0.2.6'.freeze
 end

data/spec/acceptance/srl_test_suite_spec.rb CHANGED Viewed

@@ -71,6 +71,11 @@ RSpec.describe Acceptance do
     rule_file_repr = load_file('nondigit.rule')
     test_rule_file(rule_file_repr)
   end
+  it 'should support negative character class' do
+    rule_file_repr = load_file('none_of.rule')
+    test_rule_file(rule_file_repr)
+  end
   it 'should match a tab' do
     rule_file_repr = load_file('tab.rule')

data/spec/srl_ruby_spec.rb CHANGED Viewed

@@ -44,6 +44,11 @@ describe SrlRuby do
       regexp = SrlRuby.parse("literally '.'")
       expect(regexp.source).to eq('\.')
     end
+    it 'should parse single quotes literal string' do
+      regexp = SrlRuby.parse('literally "an", whitespace, raw "[a-zA-Z]"')
+      expect(regexp.source).to eq('an\s[a-zA-Z]')
+    end
   end # context
   context 'Parsing character classes:' do
@@ -93,6 +98,41 @@ describe SrlRuby do
       # (escapes more characters than required)
       expect(regexp.source).to eq('[._%+\-]')
     end
+    it "should parse 'one of' with unquoted character class syntax" do
+      # Case of digit sequence
+      regexp = SrlRuby.parse('one of 13579, must end')
+      expect(regexp.source).to eq('[13579]$')
+      # Case of identifier-like character class
+      regexp = SrlRuby.parse('one of abcd, must end')
+      expect(regexp.source).to eq('[abcd]$')
+      # Case of arbitrary character class
+      regexp = SrlRuby.parse('one of 12hms:, must end')
+      expect(regexp.source).to eq('[12hms:]$')
+    end
+    it "should parse 'none of' syntax" do
+      regexp = SrlRuby.parse('none of "._%+-"')
+      # Remark: reference implementation less readable
+      # (escapes more characters than required)
+      expect(regexp.source).to eq('[^._%+\-]')
+    end
+    it "should parse 'none of' with unquoted character class syntax" do
+      # Case of digit sequence
+      regexp = SrlRuby.parse('none of 13579, must end')
+      expect(regexp.source).to eq('[^13579]$')
+      # Case of identifier-like character class
+      regexp = SrlRuby.parse('none of abcd, must end')
+      expect(regexp.source).to eq('[^abcd]$')
+      # Case of arbitrary character class
+      regexp = SrlRuby.parse('none of 12hms:^, must end')
+      expect(regexp.source).to eq('[^12hms:\^]$')
+    end
   end # context
   context 'Parsing special character declarations:' do
@@ -137,6 +177,12 @@ describe SrlRuby do
       source = 'any of (any character, one of "._%-+")'
       regexp = SrlRuby.parse(source)
       expect(regexp.source).to eq('(?:\w|[._%\-+])')
+    end
+    it "should parse 'either of' syntax" do
+      source = 'either of (any character, one of "._%-+")'
+      regexp = SrlRuby.parse(source)
+      expect(regexp.source).to eq('(?:\w|[._%\-+])')
     end
     it 'should anchor as alternative' do

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: srl_ruby
 version: !ruby/object:Gem::Version
-  version: 0.2.5
+  version: 0.2.6
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-04-02 00:00:00.000000000 Z
+date: 2018-04-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rley
@@ -104,6 +104,7 @@ files:
 - lib/regex/non_capturing_group.rb
 - lib/regex/polyadic_expression.rb
 - lib/regex/quantifiable.rb
+- lib/regex/raw_expression.rb
 - lib/regex/repetition.rb
 - lib/regex/wildcard.rb
 - lib/srl_ruby.rb