srl_ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +4 -0
  3. data/.rubocop.yml +3 -0
  4. data/.yardopts +6 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +66 -0
  8. data/Rakefile +16 -0
  9. data/bin/srl_ruby +58 -0
  10. data/lib/regex/abstract_method.rb +35 -0
  11. data/lib/regex/alternation.rb +27 -0
  12. data/lib/regex/anchor.rb +45 -0
  13. data/lib/regex/atomic_expression.rb +16 -0
  14. data/lib/regex/capturing_group.rb +51 -0
  15. data/lib/regex/char_class.rb +38 -0
  16. data/lib/regex/char_range.rb +51 -0
  17. data/lib/regex/char_shorthand.rb +50 -0
  18. data/lib/regex/character.rb +204 -0
  19. data/lib/regex/compound_expression.rb +57 -0
  20. data/lib/regex/concatenation.rb +29 -0
  21. data/lib/regex/expression.rb +60 -0
  22. data/lib/regex/lookaround.rb +50 -0
  23. data/lib/regex/match_option.rb +34 -0
  24. data/lib/regex/monadic_expression.rb +28 -0
  25. data/lib/regex/multiplicity.rb +91 -0
  26. data/lib/regex/non_capturing_group.rb +27 -0
  27. data/lib/regex/polyadic_expression.rb +60 -0
  28. data/lib/regex/quantifiable.rb +22 -0
  29. data/lib/regex/repetition.rb +29 -0
  30. data/lib/regex/wildcard.rb +23 -0
  31. data/lib/srl_ruby/ast_builder.rb +384 -0
  32. data/lib/srl_ruby/grammar.rb +106 -0
  33. data/lib/srl_ruby/regex_repr.rb +13 -0
  34. data/lib/srl_ruby/tokenizer.rb +147 -0
  35. data/lib/srl_ruby/version.rb +3 -0
  36. data/lib/srl_ruby.rb +4 -0
  37. data/spec/integration_spec.rb +451 -0
  38. data/spec/regex/character_spec.rb +166 -0
  39. data/spec/regex/multiplicity_spec.rb +79 -0
  40. data/spec/spec_helper.rb +16 -0
  41. data/spec/srl_ruby/srl_ruby_spec.rb +7 -0
  42. data/spec/srl_ruby/tokenizer_spec.rb +147 -0
  43. data/srl_ruby.gemspec +58 -0
  44. metadata +150 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a80b384a696db072da2e0b05df48bbb318ca817d
4
+ data.tar.gz: 85269501fe730ed717ed673dbb949f47025462a0
5
+ SHA512:
6
+ metadata.gz: 8041e060c085687c4ca8a11bfc6d893be0495c8620aeba085a83e1d2b294ddecf0844c2c98399b08b9f861b038ed4eea28018f4936b87f2384330ee9dbaaf169
7
+ data.tar.gz: a378f1a8bf10787555ff7ccb3906968b441821356cebccb0b32db0661ff23ca078b267e252b939110fe9125a618f16ed52eb7b98f3aebb5668e4a25587e11cf7
data/.rspec ADDED
@@ -0,0 +1,4 @@
1
+ --backtrace
2
+ --format documentation
3
+ --color
4
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,3 @@
1
+ Layout/EndOfLine:
2
+ Enabled: true
3
+ EnforcedStyle: lf
data/.yardopts ADDED
@@ -0,0 +1,6 @@
1
+ --exclude examples --exclude features --exclude spec
2
+ --no-private
3
+ --markup markdown
4
+ -
5
+ Changelog.md
6
+ License.txt
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in srl_ruby.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 TODO: Write your name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,66 @@
1
+ # SrlRuby
2
+ [![Build Status](https://travis-ci.org/famished-tiger/SRL-Ruby.svg?branch=master)](https://travis-ci.org/famished-tiger/SRL-Ruby)
3
+
4
+ This project implements a [Simple Regex Language](https://simple-regex.com) interpreter in Ruby.
5
+
6
+ ## What is SRL?
7
+ SRL is a small language lets you write regular expressions
8
+ with a readable syntax that bears some resemblance with English.
9
+ Here are a couple of hyperlinks of interest:
10
+ [Main SRL website](https://simple-regex.com)
11
+ [SRL libraries](https://github.com/SimpleRegex)
12
+
13
+
14
+ ## An example
15
+ Let's assume that we want to create a regular expression that recognizes time in a day in the 24 hours format hh:mm:ss.
16
+ In SRL:
17
+ ```
18
+ digit from 0 to 2, digit,
19
+ (literally ':', digit from 0 to 5, digit) twice
20
+ ```
21
+
22
+ If one runs the `srl_ruby` command-line like this:
23
+
24
+ ```
25
+ srl_ruby "digit from 0 to 2, digit, (literally ':', digit from 0 to 5, digit) twice"
26
+ ```
27
+ It replies with:
28
+ ```
29
+ SRL input: digit from 0 to 2, digit, (literally ':', digit from 0 to 5, digit) twice
30
+ Resulting Regexp: /[0-2]\d(?::[0-5]\d){2}/
31
+ ```
32
+
33
+ In other words, it translates a readable SRL expression into its regexp equivalent.
34
+
35
+ ## Installation
36
+
37
+ Add this line to your application's Gemfile:
38
+
39
+ ```ruby
40
+ gem 'srl_ruby'
41
+ ```
42
+
43
+ And then execute:
44
+
45
+ $ bundle
46
+
47
+ Or install it yourself as:
48
+
49
+ $ gem install srl_ruby
50
+
51
+ ## Usage
52
+
53
+ TODO: Write usage instructions here
54
+
55
+
56
+ ## Contributing
57
+
58
+ Bug reports and pull requests are welcome on GitHub at https://github.com/famished-tiger/SRL-Ruby. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
59
+
60
+ ## License
61
+
62
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
63
+
64
+ ## Code of Conduct
65
+
66
+ Everyone interacting in the SrlRuby project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/srl_ruby/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ desc 'Run RSpec'
5
+ RSpec::Core::RakeTask.new do |spec|
6
+ spec.pattern = 'spec/**/*_spec.rb'
7
+ end
8
+
9
+
10
+ # Combine RSpec tests
11
+ desc 'Run tests, with RSpec'
12
+ task test: [:spec]
13
+
14
+
15
+ # Default rake task
16
+ task default: :test
data/bin/srl_ruby ADDED
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/srl_ruby/tokenizer'
3
+ require_relative '../lib/srl_ruby/grammar'
4
+ require_relative '../lib/srl_ruby/ast_builder'
5
+
6
+ def print_title(aTitle)
7
+ puts aTitle
8
+ puts '=' * aTitle.size
9
+ end
10
+
11
+ # Parse the input expression in command-line
12
+ if ARGV.empty?
13
+ my_name = File.basename(__FILE__)
14
+ msg = <<-END_MSG
15
+ Simple Regex Language parser:
16
+ - Parses a subset of the SRL language and displays the parse tree
17
+
18
+ Command-line syntax:
19
+ ruby #{my_name} "SRL expression"
20
+ where:
21
+ the SRL expression is enclosed between double quotes (")
22
+
23
+ Examples:
24
+ ruby #{my_name} "letter from a to f exactly 4 times"
25
+ ruby #{my_name} "uppercase letter between 2 and 3 times"
26
+ ruby #{my_name} "digit from 0 to 7 once or more"
27
+ END_MSG
28
+ puts msg
29
+ exit(1)
30
+ end
31
+ puts 'SRL input: ' + ARGV[0]
32
+
33
+ # Create a Rley facade object
34
+ engine = Rley::Engine.new
35
+
36
+ ########################################
37
+ # Step 1. Load a grammar for calculator
38
+ engine.use_grammar(SrlRuby::Grammar)
39
+
40
+ lexer = SrlRuby::Tokenizer.new(ARGV[0])
41
+ result = engine.parse(lexer.tokens)
42
+
43
+ unless result.success?
44
+ # Stop if the parse failed...
45
+ puts "Parsing of '#{ARGV[0]}' failed"
46
+ puts "Reason: #{result.failure_reason.message}"
47
+ exit(1)
48
+ end
49
+
50
+ # Generate an abstract syntax tree (AST) from the parse result
51
+ engine.configuration.repr_builder = SrlRuby::ASTBuilder
52
+ ast_ptree = engine.convert(result)
53
+
54
+ # Now output the regexp literal
55
+ root = ast_ptree.root
56
+ puts "Resulting Regexp: /#{root.to_str}/" # Output the expression result
57
+
58
+ # End of file
@@ -0,0 +1,35 @@
1
+ # File: abstract_method.rb
2
+
3
+ # Mix-in module. Provides the method 'abstract_method' that raises an exception
4
+ # with an appropriate message when called.
5
+ module AbstractMethod
6
+ # Call this method in the body of your abstract methods.
7
+ # Example:
8
+ # require 'AbstractMethod'
9
+ # class SomeClass
10
+ # include AbstractMethod # To add the behaviour from the mix-in module AbstractMethod
11
+ # ...
12
+ # Consider that SomeClass has an abstract method called 'some_method'
13
+ #
14
+ # def some_method() abstract_method
15
+ # end
16
+ def abstract_method()
17
+ # Determine the short class name of self
18
+ className = self.class.name.split(/::/).last
19
+
20
+ # Retrieve the top text line of the call stack
21
+ top_line = caller(1..1)
22
+
23
+ # Extract the calling method name
24
+ callerNameInQuotes = top_line.scan(/`.+?$/).first
25
+ callerName = callerNameInQuotes.gsub(/`|'/, '') # Remove enclosing quotes
26
+
27
+ # Build the error message
28
+ prefix = "The method #{className}##{callerName} is abstract."
29
+ suffix = " It should be implemented in subclasses of #{className}."
30
+ error_message = prefix + suffix
31
+ raise NotImplementedError, error_message
32
+ end
33
+ end # module
34
+
35
+ # End of file
@@ -0,0 +1,27 @@
1
+ # File: alternation.rb
2
+
3
+ require_relative 'polyadic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+ # Abstract class. A n-ary matching operator.
7
+ # It succeeds when one child expression succeeds to match the subject text
8
+ class Alternation < PolyadicExpression
9
+ # Constructor.
10
+ def initialize(*theChildren)
11
+ super(theChildren)
12
+ end
13
+
14
+ protected
15
+
16
+ # Conversion method re-definition.
17
+ # Purpose: Return the String representation of the concatented expressions.
18
+ def text_repr()
19
+ result_children = children.map(&:to_str)
20
+ result = '(?:' + result_children.join('|') + ')'
21
+
22
+ return result
23
+ end
24
+ end # class
25
+ end # module
26
+
27
+ # End of file
@@ -0,0 +1,45 @@
1
+ # File: anchor.rb
2
+
3
+ require_relative 'atomic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+ # An anchor is a zero-width assertion based on the current position.
7
+ class Anchor < AtomicExpression
8
+ # A Hash for converting a lexeme to a symbolic value
9
+ AnchorToSymbol = {
10
+ # Lexeme => Symbol value
11
+ '^' => :soLine, # Start of line
12
+ '$' => :eoLine, # End of line
13
+ '\A' => :soSubject,
14
+ '\b' => :wordBoundary,
15
+ '\B' => :nonAtWordBoundary,
16
+ '\G' => :firstMatch,
17
+ '\z' => :eoSubject,
18
+ '\Z' => :eoSubjectOrBeforeNLAtEnd
19
+ }.freeze
20
+
21
+ # A symbolic value that identifies the type of assertion to perform
22
+ attr_reader(:kind)
23
+
24
+ # Constructor
25
+ # @param aKind [String] Lexeme representation of the anchor
26
+ def initialize(aKind)
27
+ @kind = valid_kind(aKind)
28
+ end
29
+
30
+ # Conversion method re-definition.
31
+ # Purpose: Return the String representation of the expression.
32
+ def to_str()
33
+ return AnchorToSymbol.rassoc(kind).first
34
+ end
35
+
36
+ private
37
+
38
+ # Return the symbolic value corresponding to the given lexeme.
39
+ def valid_kind(aKind)
40
+ return AnchorToSymbol[aKind]
41
+ end
42
+ end # class
43
+ end # module
44
+
45
+ # End of file
@@ -0,0 +1,16 @@
1
+ # File: atomic_expression.rb
2
+
3
+ require_relative 'expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+ # Abstract class. A valid regular expression that
7
+ # cannot be further decomposed into sub-expressions.
8
+ class AtomicExpression < Expression
9
+ # Redefined method. Return true since it may not have any child.
10
+ def atomic?
11
+ return true
12
+ end
13
+ end # class
14
+ end # module
15
+
16
+ # End of file
@@ -0,0 +1,51 @@
1
+ # File: capturing_group.rb
2
+
3
+ require_relative 'monadic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+ # An association between a capture variable and an expression
7
+ # the subject text in the same serial arrangement
8
+ class CapturingGroup < MonadicExpression
9
+ # The capture variable id. It is a Fixnum when the capture group gets
10
+ # a sequence number,
11
+ # a String when it is an user-defined name
12
+ attr_reader(:id)
13
+
14
+ # When true, then capturing group forbids backtracking requests from its parent
15
+ # expression.
16
+ attr_reader(:no_backtrack)
17
+
18
+ # Constructor.
19
+ # [aChildExpression] A sub-expression to match. When successful
20
+ # the matching text is assigned to the capture variable.
21
+ # [theId] The id of the capture variable.
22
+ # [noBacktrack] A flag that specifies whether the capturing group forbids
23
+ # backtracking requests from its parent expression.
24
+ def initialize(aChildExpression, theId = nil, noBacktrack = false)
25
+ super(aChildExpression)
26
+ @id = theId
27
+ @no_backtrack = noBacktrack
28
+ end
29
+
30
+ # Return true iff the capturing group has a name (and not )
31
+ def named?()
32
+ return id.kind_of?(String)
33
+ end
34
+
35
+ # Conversion method re-definition.
36
+ # Purpose: Return the String representation of the captured expression.
37
+ def to_str()
38
+ prefix = named? ? "?<#{id}>" : ''
39
+ atomic = no_backtrack ? '?>' : ''
40
+ if child.is_a?(Regex::NonCapturingGroup)
41
+ # Minor optimization
42
+ result = '(' + atomic + prefix + child.child.to_str + ')'
43
+ else
44
+ result = '(' + atomic + prefix + child.to_str + ')'
45
+ end
46
+ return result
47
+ end
48
+ end # class
49
+ end # module
50
+
51
+ # End of file
@@ -0,0 +1,38 @@
1
+ # File: char_class.rb
2
+
3
+ require_relative 'polyadic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+ # Abstract class. A n-ary matching operator.
7
+ # It succeeds when one child expression succeeds to match the subject text.
8
+ class CharClass < PolyadicExpression
9
+ # These are characters with special meaning in character classes
10
+ Metachars = ']\^-'.codepoints
11
+ # A flag that indicates whether the character is negated
12
+ attr_reader(:negated)
13
+
14
+ # Constructor.
15
+ def initialize(to_negate, *theChildren)
16
+ super(theChildren)
17
+ @negated = to_negate
18
+ end
19
+
20
+ protected
21
+
22
+ # Conversion method re-definition.
23
+ # Purpose: Return the String representation of the character class.
24
+ def text_repr()
25
+ result_children = children.inject('') do |subResult, aChild|
26
+ if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
27
+ subResult << "\\" # Escape meta-character...
28
+ end
29
+ subResult << aChild.to_str
30
+ end
31
+ result = '[' + (negated ? '^' : '') + result_children + ']'
32
+
33
+ return result
34
+ end
35
+ end # class
36
+ end # module
37
+
38
+ # End of file
@@ -0,0 +1,51 @@
1
+ # File: char_range.rb
2
+
3
+ require_relative 'polyadic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+ # A binary expression that represents a contiguous range of characters.
7
+ # Assumption: characters are ordered by codepoint
8
+ class CharRange < PolyadicExpression
9
+ # Constructor.
10
+ # [thelowerBound]
11
+ # A character that will be the lower bound value for the range.
12
+ # [theUpperBound]
13
+ # A character that will be the upper bound value for the range.
14
+ # TODO: optimisation. Build a Character if lower bound == upper bound.
15
+ def initialize(theLowerBound, theUpperBound)
16
+ range = validated_range(theLowerBound, theUpperBound)
17
+ super(range)
18
+ end
19
+
20
+ # Return the lower bound of the range.
21
+ def lower()
22
+ return children.first
23
+ end
24
+
25
+ # Return the upper bound of the range.
26
+ def upper()
27
+ return children.last
28
+ end
29
+
30
+ protected
31
+
32
+ # Conversion method re-definition.
33
+ # Purpose: Return the String representation of the concatented expressions.
34
+ def text_repr()
35
+ result = lower.to_str + '-' + upper.to_str
36
+
37
+ return result
38
+ end
39
+
40
+ private
41
+
42
+ # Validation method. Returns a couple of Characters.after their validation.
43
+ def validated_range(theLowerBound, theUpperBound)
44
+ msg = 'Character range error: lower bound is greater than upper bound.'
45
+ raise StandardError, msg if theLowerBound.codepoint > theUpperBound.codepoint
46
+ return [theLowerBound, theUpperBound]
47
+ end
48
+ end # class
49
+ end # module
50
+
51
+ # End of file
@@ -0,0 +1,50 @@
1
+ # File: char_shorthand.rb
2
+
3
+ require_relative 'atomic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+ # A pre-defined character class is in essence a name for a built-in, standard character class.
7
+ class CharShorthand < AtomicExpression
8
+ # A constant Hash that defines all the predefined character shorthands.
9
+ # It contains pairs of the form:
10
+ # a pre-defined character shorthand letter => a CharRange object
11
+ StandardCClasses = {
12
+ 'd' => '[0-9]',
13
+ 'D' => '[^0-9]',
14
+ 'h' => '[0-9a-fA-F]',
15
+ 'H' => '[^0-9a-fA-F]',
16
+ 's' => '[ \t\r\n\f]',
17
+ 'S' => '[^ \t\r\n\f]',
18
+ 'w' => '[0-9a-zA-Z_]',
19
+ 'W' => '[^0-9a-zA-Z_]'
20
+ }.freeze
21
+
22
+ # An one-letter abbreviation
23
+ attr_reader(:shortname)
24
+
25
+ # Constructor
26
+ def initialize(aShortname)
27
+ @shortname = valid_shortname(aShortname)
28
+ end
29
+
30
+ protected
31
+
32
+ # Conversion method re-definition.
33
+ # Purpose: Return the String representation of the expression.
34
+ def text_repr()
35
+ return "\\#{shortname}"
36
+ end
37
+
38
+ private
39
+
40
+ # Return the validated short name.
41
+ def valid_shortname(aShortname)
42
+ msg = "Unknown predefined character class \\#{aShortname}"
43
+ raise StandardError, msg unless StandardCClasses.include? aShortname
44
+
45
+ return aShortname
46
+ end
47
+ end # class
48
+ end # module
49
+
50
+ # End of file