RubyGems - srl_ruby - Versions diffs - 0.0.1 - Mend

srl_ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +7 -0
data/.rspec +4 -0
data/.rubocop.yml +3 -0
data/.yardopts +6 -0
data/Gemfile +6 -0
data/LICENSE.txt +21 -0
data/README.md +66 -0
data/Rakefile +16 -0
data/bin/srl_ruby +58 -0
data/lib/regex/abstract_method.rb +35 -0
data/lib/regex/alternation.rb +27 -0
data/lib/regex/anchor.rb +45 -0
data/lib/regex/atomic_expression.rb +16 -0
data/lib/regex/capturing_group.rb +51 -0
data/lib/regex/char_class.rb +38 -0
data/lib/regex/char_range.rb +51 -0
data/lib/regex/char_shorthand.rb +50 -0
data/lib/regex/character.rb +204 -0
data/lib/regex/compound_expression.rb +57 -0
data/lib/regex/concatenation.rb +29 -0
data/lib/regex/expression.rb +60 -0
data/lib/regex/lookaround.rb +50 -0
data/lib/regex/match_option.rb +34 -0
data/lib/regex/monadic_expression.rb +28 -0
data/lib/regex/multiplicity.rb +91 -0
data/lib/regex/non_capturing_group.rb +27 -0
data/lib/regex/polyadic_expression.rb +60 -0
data/lib/regex/quantifiable.rb +22 -0
data/lib/regex/repetition.rb +29 -0
data/lib/regex/wildcard.rb +23 -0
data/lib/srl_ruby/ast_builder.rb +384 -0
data/lib/srl_ruby/grammar.rb +106 -0
data/lib/srl_ruby/regex_repr.rb +13 -0
data/lib/srl_ruby/tokenizer.rb +147 -0
data/lib/srl_ruby/version.rb +3 -0
data/lib/srl_ruby.rb +4 -0
data/spec/integration_spec.rb +451 -0
data/spec/regex/character_spec.rb +166 -0
data/spec/regex/multiplicity_spec.rb +79 -0
data/spec/spec_helper.rb +16 -0
data/spec/srl_ruby/srl_ruby_spec.rb +7 -0
data/spec/srl_ruby/tokenizer_spec.rb +147 -0
data/srl_ruby.gemspec +58 -0
metadata +150 -0

data/spec/integration_spec.rb ADDED Viewed

@@ -0,0 +1,451 @@
+require_relative 'spec_helper' # Use the RSpec framework
+require_relative '../lib/srl_ruby/tokenizer'
+require_relative '../lib/srl_ruby/grammar'
+require_relative '../lib/srl_ruby/ast_builder'
+module SrlRuby
+  describe 'Integration tests:' do
+    def parse(someSRL)
+      tokenizer = SrlRuby::Tokenizer.new(someSRL)
+      @engine.parse(tokenizer.tokens)
+    end
+    def regexp_repr(aResult)
+      # Generate an abstract syntax parse tree from the parse result
+      tree = @engine.convert(aResult)
+      tree.root
+    end
+    before(:each) do
+      @engine = Rley::Engine.new do |config|
+        config.repr_builder = ASTBuilder
+      end
+      @engine.use_grammar(SrlRuby::Grammar)
+    end
+    context 'Parsing character ranges:' do
+      it "should parse 'letter from ... to ...' syntax" do
+        result = parse('letter from a to f')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[a-f]')
+      end
+      it "should parse 'uppercase letter from ... to ...' syntax" do
+        result = parse('UPPERCASE letter from A to F')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[A-F]')
+      end
+      it "should parse 'letter' syntax" do
+        result = parse('letter')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[a-z]')
+      end
+      it "should parse 'uppercase letter' syntax" do
+        result = parse('uppercase letter')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[A-Z]')
+      end
+      it "should parse 'digit from ... to ...' syntax" do
+        result = parse('digit from 1 to 4')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[1-4]')
+      end
+    end # context
+    context 'Parsing string literals:' do
+      it 'should parse double quotes literal string' do
+        result = parse('literally "hello"')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('hello')
+      end
+      it 'should parse single quotes literal string' do
+        result = parse("literally 'hello'")
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('hello')
+      end
+      it 'should escape special characters' do
+        result = parse("literally '.'")
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\.')
+      end
+    end
+    context 'Parsing character classes:' do
+      it "should parse 'digit' syntax" do
+        result = parse('digit')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\d')
+      end
+      it "should parse 'number' syntax" do
+        result = parse('number')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\d')
+      end
+      it "should parse 'any character' syntax" do
+        result = parse('any character')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\w')
+      end
+      it "should parse 'no character' syntax" do
+        result = parse('no character')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\W')
+      end
+      it "should parse 'whitespace' syntax" do
+        result = parse('whitespace')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\s')
+      end
+      it "should parse 'no whitespace' syntax" do
+        result = parse('no whitespace')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\S')
+      end
+      it "should parse 'anything' syntax" do
+        result = parse('anything')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('.')
+      end
+      it "should parse 'one of' syntax" do
+        result = parse('one of "._%+-"')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        # Remark: reference implementation less readable
+        # (escapes more characters than required)
+        expect(regexp.to_str).to eq('[._%+\-]')
+      end
+    end # context
+    context 'Parsing special character declarations:' do
+      it "should parse 'tab' syntax" do
+        result = parse('tab')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\t')
+      end
+      it "should parse 'backslash' syntax" do
+        result = parse('backslash')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\\')
+      end
+      it "should parse 'new line' syntax" do
+        result = parse('new line')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\n')
+      end
+    end # context
+    context 'Parsing alternations:' do
+      it "should parse 'any of' syntax" do
+        source = 'any of (any character, one of "._%-+")'
+        result = parse(source)
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('(?:\w|[._%\-+])')
+      end
+    end # context
+    context 'Parsing concatenation:' do
+      it 'should reject dangling comma' do
+        source = 'literally "a",'
+        result = parse(source)
+        expect(result).not_to be_success
+        message_prefix = /Premature end of input after ','/
+        expect(result.failure_reason.message).to match(message_prefix)
+      end
+      it 'should parse concatenation' do
+        result = parse('any of (literally "sample", (digit once or more))')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
+      end
+      it 'should parse a long sequence of patterns' do
+        source = <<-ENDS
+        any of (any character, one of "._%-+") once or more,
+        literally "@",
+        any of (digit, letter, one of ".-") once or more,
+        literally ".",
+        letter at least 2 times
+  ENDS
+        result = parse(source)
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        # SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
+        expectation = '(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}'
+        expect(regexp.to_str).to eq(expectation)
+      end
+    end # context
+    context 'Parsing quantifiers:' do
+      let(:prefix) { 'letter from p to t ' }
+      it "should parse 'once' syntax" do
+        result = parse(prefix + 'once')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[p-t]{1}')
+      end
+      it "should parse 'twice' syntax" do
+        result = parse('digit twice')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('\d{2}')
+      end
+      it "should parse 'optional' syntax" do
+        result = parse('anything optional')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('.?')
+      end
+      it "should parse 'exactly ... times' syntax" do
+        result = parse('letter from a to f exactly 4 times')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[a-f]{4}')
+      end
+      it "should parse 'between ... and ... times' syntax" do
+        result = parse(prefix + 'between 2 and 4 times')
+        expect(result).to be_success
+        # Dropping 'times' keyword is shorter syntax
+        expect(parse(prefix + 'between 2 and 4')).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[p-t]{2,4}')
+      end
+      it "should parse 'once or more' syntax" do
+        result = parse(prefix + 'once or more')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[p-t]+')
+      end
+      it "should parse 'never or more' syntax" do
+        result = parse(prefix + 'never or more')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[p-t]*')
+      end
+      it "should parse 'at least  ... times' syntax" do
+        result = parse(prefix + 'at least 10 times')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[p-t]{10,}')
+      end
+    end # context
+    context 'Parsing lookaround:' do
+      it 'should parse positive lookahead' do
+        result = parse('letter if followed by (anything once or more, digit)')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
+      end
+      it 'should parse negative lookahead' do
+        result = parse('letter if not followed by (anything once or more, digit)')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
+      end
+      it 'should parse positive lookbehind' do
+        result = parse('literally "bar" if already had literally "foo"')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('bar(?<=foo)')
+      end
+      it 'should parse negative lookbehind' do
+        result = parse('literally "bar" if not already had literally "foo"')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('bar(?<!foo)')
+      end
+    end # context
+    context 'Parsing capturing group:' do
+      it 'should parse simple anonymous capturing group' do
+        result = parse('capture(literally "sample")')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('(sample)')
+      end
+      it 'should parse complex anonymous capturing group' do
+        source = 'capture(any of (literally "sample", (digit once or more)))'
+        result = parse(source)
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
+      end
+      it 'should parse simple anonymous until capturing group' do
+        result = parse('capture anything once or more until literally "!"')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('(.+)!')
+      end
+      it 'should parse complex named capturing group' do
+        source = <<-END_SRL
+  capture(any of (literally "sample", (digit once or more)))
+    as "foo"
+  END_SRL
+        result = parse(source)
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
+      end
+      it 'should parse a sequence with named capturing groups' do
+        source = <<-ENDS
+        capture (anything once or more) as "first",
+        literally " - ",
+        capture literally "second part" as "second"
+  ENDS
+        result = parse(source)
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
+      end
+      it 'should parse complex named until capturing group' do
+        source = 'capture (anything once or more) as "foo" until literally "m"'
+        result = parse(source)
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('(?<foo>.+)m')
+      end
+    end # context
+    context 'Parsing anchors:' do
+      it 'should parse begin anchors' do
+        result = parse('starts with literally "match"')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('^match')
+      end
+      it 'should parse begin anchors (alternative syntax)' do
+        result = parse('begin with literally "match"')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('^match')
+      end
+      it 'should parse end anchors' do
+        result = parse('literally "match" must end')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('match$')
+      end
+      it 'should parse combination of begin and end anchors' do
+        result = parse('starts with literally "match" must end')
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        expect(regexp.to_str).to eq('^match$')
+      end
+      it 'should accept anchor with a sequence of patterns' do
+        source = <<-ENDS
+        begin with any of (digit, letter, one of ".-") once or more,
+        literally ".",
+        letter at least 2 times must end
+  ENDS
+        result = parse(source)
+        expect(result).to be_success
+        regexp = regexp_repr(result)
+        # SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
+        expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
+      end
+    end # context
+  end # describe
+end # module
+# End of file

data/spec/regex/character_spec.rb ADDED Viewed

@@ -0,0 +1,166 @@
+# File: character_spec.rb
+require_relative '../spec_helper' # Use the RSpec test framework
+require_relative '../../lib/regex/character'
+module Regex # Open this namespace, to get rid of scope qualifiers
+  describe Character do
+    # This constant holds an arbitrary selection of characters
+    SampleChars = [?a, ?\0, ?\u0107].freeze
+    # This constant holds the codepoints of the character selection
+    SampleInts = [0x61, 0, 0x0107].freeze
+    # This constant holds an arbitrary selection of two characters (digrams)
+    # escape sequences
+    SampleDigrams = %w[\n \e \0 \6 \k].freeze
+    # This constant holds an arbitrary selection of escaped octal
+    # or hexadecimal literals
+    SampleNumEscs = %w[\0 \07 \x07 \xa \x0F \u03a3 \u{a}].freeze
+    before(:all) do
+      # Ensure that the set of codepoints is mapping the set of chars...
+      expect(SampleChars.map(&:ord)).to eq(SampleInts)
+    end
+    context 'Creation & initialization' do
+      it 'should be created with a with an integer value (codepoint) or...' do
+        SampleInts.each do |aCodepoint|
+          expect { Character.new(aCodepoint) }.not_to raise_error
+        end
+      end
+      it '...could be created with a single character String or...' do
+        SampleChars.each do |aChar|
+          expect { Character.new(aChar) }.not_to raise_error
+        end
+      end
+      it '...could be created with an escape sequence' do
+        # Case 1: escape sequence is a digram
+        SampleDigrams.each do |anEscapeSeq|
+          expect { Character.new(anEscapeSeq) }.not_to raise_error
+        end
+        # Case 2: escape sequence is an escaped octal or hexadecimal literal
+        SampleNumEscs.each do |anEscapeSeq|
+          expect { Character.new(anEscapeSeq) }.not_to raise_error
+        end
+      end
+    end # context
+    context 'Provided services' do
+      it 'Should know its lexeme if created from a string' do
+        # Lexeme is defined when the character was initialised from a text
+        SampleChars.each do |aChar|
+          ch = Character.new(aChar)
+          expect(ch.lexeme).to eq(aChar)
+        end
+      end
+      it 'Should not know its lexeme representation from a codepoint' do
+        SampleInts.each do |aChar|
+          ch = Character.new(aChar)
+          expect(ch.lexeme).to be_nil
+        end
+      end
+      it 'should know its String representation' do
+        # Try for one character
+        newOne = Character.new(?\u03a3)
+        expect(newOne.char).to eq('Σ')
+        expect(newOne.to_str).to eq("\u03A3")
+        # Try with our chars sample
+        SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
+        # Try with our codepoint sample
+        mapped_chars = SampleInts.map do |aCodepoint|
+          Character.new(aCodepoint).char
+        end
+        expect(mapped_chars).to eq(SampleChars)
+        # Try with our escape sequence samples
+        (SampleDigrams + SampleNumEscs).each do |anEscSeq|
+          expectation = String.class_eval(%Q|"#{anEscSeq}"|, __FILE__, __LINE__)
+          Character.new(anEscSeq).to_str == expectation
+        end
+      end
+      it 'should know its codepoint' do
+        # Try for one character
+        newOne = Character.new(?\u03a3)
+        expect(newOne.codepoint).to eq(0x03a3)
+        # Try with our chars sample
+        allCodepoints = SampleChars.map do |aChar|
+          Character.new(aChar).codepoint
+        end
+        expect(allCodepoints).to eq(SampleInts)
+        # Try with our codepoint sample
+        mapped_chars = SampleInts.each do |aCodepoint|
+          expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint)
+        end
+        # Try with our escape sequence samples
+        (SampleDigrams + SampleNumEscs).each do |anEscSeq|
+          expectation = String.class_eval(%Q|"#{anEscSeq}".ord()|, __FILE__, __LINE__)
+          expect(Character.new(anEscSeq).codepoint).to eq(expectation)
+        end
+      end
+      it 'should known whether it is equal to another Object' do
+        newOne = Character.new(?\u03a3)
+        # Case 1: test equality with itself
+        expect(newOne).to eq(newOne)
+        # Case 2: test equality with another Character
+        expect(newOne).to eq(Character.new(?\u03a3))
+        expect(newOne).not_to eq(Character.new(?\u0333))
+        # Case 3: test equality with an integer value
+        # (equality based on codepoint value)
+        expect(newOne).to eq(0x03a3)
+        expect(newOne).not_to eq(0x0333)
+        # Case 4: test equality with a single-character String
+        expect(newOne).to eq(?\u03a3)
+        expect(newOne).not_to eq(?\u0333)
+        # Case 5: test fails with multiple character strings
+        expect(newOne).not_to eq('03a3')
+        # Case 6: equality testing with arbitray object
+        expect(newOne).not_to eq(nil)
+        expect(newOne).not_to eq(Object.new)
+        # In case 6, equality is based on to_s method.
+        simulator = double('fake')
+        expect(simulator).to receive(:to_s).and_return(?\u03a3)
+        expect(newOne).to eq(simulator)
+        # Create a module that re-defines the existing to_s method
+        module Tweak_to_s
+          def to_s() # Overwrite the existing to_s method
+            return ?\u03a3
+          end
+        end # module
+        weird = Object.new
+        weird.extend(Tweak_to_s)
+        expect(newOne).to eq(weird)
+      end
+      it 'should know its readable description' do
+        ch1 = Character.new('a')
+        expect(ch1.explain).to eq("the character 'a'")
+        ch2 = Character.new(?\u03a3)
+        expect(ch2.explain).to eq("the character '\u03a3'")
+      end
+    end # context
+  end # describe
+end # module
+# End of file

data/spec/regex/multiplicity_spec.rb ADDED Viewed

@@ -0,0 +1,79 @@
+# File: Multiplicity_spec.rb
+require_relative '../spec_helper' # Use the RSpec test framework
+require_relative '../../lib/regex/multiplicity'
+module SRL
+  # Reopen the module, in order to get rid of fully qualified names
+  module Regex # This module is used as a namespace
+    describe Multiplicity do
+      context 'Creation & initialisation' do
+        it 'should be created with 3 arguments' do
+          # Valid cases: initialized with two integer values and a policy symbol
+          %i[greedy lazy possessive].each do |aPolicy|
+            expect { Multiplicity.new(0, 1, aPolicy) }.not_to raise_error
+          end
+          # Invalid case: initialized with invalid policy value
+          err = StandardError
+          msg = "Invalid repetition policy 'KO'."
+          expect { Multiplicity.new(0, :more, 'KO') }.to raise_error(err, msg)
+        end
+      end
+      context 'Provided services' do
+        it 'should know its text representation' do
+          policy2text = { greedy: '', lazy: '?', possessive: '+' }
+          # Case: zero or one
+          policy2text.each_key do |aPolicy|
+            multi = Multiplicity.new(0, 1, aPolicy)
+            expect(multi.to_str).to eq("?#{policy2text[aPolicy]}")
+          end
+          # Case: zero or more
+          policy2text.each_key do |aPolicy|
+            multi = Multiplicity.new(0, :more, aPolicy)
+            expect(multi.to_str).to eq("*#{policy2text[aPolicy]}")
+          end
+          # Case: one or more
+          policy2text.each_key do |aPolicy|
+            multi = Multiplicity.new(1, :more, aPolicy)
+            expect(multi.to_str).to eq("+#{policy2text[aPolicy]}")
+          end
+          # Case: exactly m times
+          policy2text.each_key do |aPolicy|
+            samples = [1, 2, 5, 100]
+            samples.each do |aCount|
+              multi = Multiplicity.new(aCount, aCount, aPolicy)
+              expect(multi.to_str).to eq("{#{aCount}}#{policy2text[aPolicy]}")
+            end
+          end
+          # Case: m, n times
+          policy2text.each_key do |aPolicy|
+            samples = [1, 2, 5, 100]
+            samples.each do |aCount|
+              upper = aCount + 1 + rand(20)
+              multi = Multiplicity.new(aCount, upper, aPolicy)
+              expectation = "{#{aCount},#{upper}}#{policy2text[aPolicy]}"
+              expect(multi.to_str).to eq(expectation)
+            end
+          end
+          # Case: m or more
+          policy2text.each_key do |aPolicy|
+            samples = [2, 3, 5, 100]
+            samples.each do |aCount|
+              multi = Multiplicity.new(aCount, :more, aPolicy)
+              expect(multi.to_str).to eq("{#{aCount},}#{policy2text[aPolicy]}")
+            end
+          end
+        end
+      end
+    end
+  end # module
+end # module
+# End of file