RubyGems - regexp-examples - Versions diffs - 0.5.4 → 0.6.0 - Mend

regexp-examples 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/README.md +2 -2
data/lib/regexp-examples/chargroup_parser.rb +70 -0
data/lib/regexp-examples/constants.rb +33 -12
data/lib/regexp-examples/groups.rb +2 -58
data/lib/regexp-examples/parser.rb +7 -3
data/lib/regexp-examples/version.rb +1 -1
data/spec/regexp-examples_spec.rb +32 -7
metadata +2 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 749811525558a012bc616a34cc04f017270d04e8
-  data.tar.gz: 31de4f7cbd8f7aed68d2c4efd370cfdc85f9bd0f
+  metadata.gz: 062a1310c8b7c861a7724fd75745c1e9bff9257f
+  data.tar.gz: b05ce36dbb3c0afee079091d5c1016a429f1d099
 SHA512:
-  metadata.gz: fbc3bf4111f29daec1c550dbdf03fc97fcb3fde5069a7a0259547d5df0e47972e3a112473f2a1a9c95788ec8a39c885b71a19d1b22717d15e674b71864d55020
-  data.tar.gz: 6082e8ed18458b1eca2c47b18cc3e3484a8cb93c1111215f3565578dada06fc2deb388fb3813cae24343c6e2d1c41cdc377359d87876bc14e8dc53ffa30b700d
+  metadata.gz: 5519ec6e710a257c165b35f6b4138bc03d37973c31fcb4a7c6091713ab347438dd8e8bbde722c5ab21f63d6ede0565be2b77de0747d42f9f1e83413312b71d14
+  data.tar.gz: 58d67c5e25de2dbd238cb53a192c784047b8ab7e8a836d87c47c9cf5316133d3383a8fdec0a7c9fe07eec0518266b3c66a66c8953e830277b41e1bf9ca53e525

data/README.md CHANGED Viewed

@@ -43,6 +43,7 @@ For more detail on this, see [configuration options](#configuration-options).
 * Escape sequences, e.g. `/\x42/`, `/\x5word/`, `/#{"\x80".force_encoding("ASCII-8BIT")}/`
 * Unicode characters, e.g. `/\u0123/`, `/\uabcd/`, `/\u{789}/`
 * Octal characters, e.g. `/\10/`, `/\177/`
+* POSIX bracket expressions (including negation), e.g. `/[[:alnum:]]/`, `/[[:^space:]]/`
 * **Arbitrarily complex combinations of all the above!**
 * Regexp options can also be used:
@@ -54,14 +55,13 @@ For more detail on this, see [configuration options](#configuration-options).
 ## Bugs and Not-Yet-Supported syntax
 * Nested character classes, and the use of set intersection ([See here](http://www.ruby-doc.org/core-2.2.0/Regexp.html#class-Regexp-label-Character+Classes) for the official documentation on this.) For example:
-  * `/[[abc]]/.examples`  (which _should_ return `["a", "b", "c"]`)
+  * `/[[abc]de]/.examples`  (which _should_ return `["a", "b", "c", "d", "e"]`)
   * `/[[a-d]&&[c-f]]/.examples` (which _should_ return: `["c", "d"]`)
 * Conditional capture groups, such as `/(group1) (?(1)yes|no)`
 Using any of the following will raise a RegexpExamples::UnsupportedSyntax exception (until such time as they are implemented!):
-* POSIX bracket expressions, e.g. `/[[:alnum:]]/`, `/[[:space:]]/`
 * Named properties, e.g. `/\p{L}/` ("Letter"), `/\p{Arabic}/` ("Arabic character"), `/\p{^Ll}/` ("Not a lowercase letter")
 * Subexpression calls, e.g. `/(?<name> ... \g<name>* )/` (Note: These could get _really_ ugly to implement, and may even be impossible, so I highly doubt it's worth the effort!)

data/lib/regexp-examples/chargroup_parser.rb ADDED Viewed

@@ -0,0 +1,70 @@
+module RegexpExamples
+  # Given an array of chars from inside a character set,
+  # Interprets all backslashes, ranges and negations
+  # TODO: This needs a bit of a rewrite because:
+  #   A) It's ugly
+  #   B) It doesn't take into account nested character groups, or set intersection
+  # To achieve this, the algorithm needs to be recursive, like the main Parser.
+  class ChargroupParser
+    def initialize(chars)
+      @chars = chars
+      if @chars[0] == "^"
+        @negative = true
+        @chars = @chars[1..-1]
+      else
+        @negative = false
+      end
+      init_backslash_chars
+      init_ranges
+    end
+    def result
+      @negative ? (CharSets::Any - @chars) : @chars
+    end
+    private
+    def init_backslash_chars
+      @chars.each_with_index do |char, i|
+        if char == "\\"
+          if BackslashCharMap.keys.include?(@chars[i+1])
+            @chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
+          elsif @chars[i+1] == 'b'
+            @chars[i..i+1] = "\b"
+          elsif @chars[i+1] == "\\"
+            @chars.delete_at(i+1)
+          else
+            @chars.delete_at(i)
+          end
+        end
+      end
+    end
+    def init_ranges
+      # remove hyphen ("-") from front/back, if present
+      hyphen = nil
+      hyphen = @chars.shift if @chars.first == "-"
+      hyphen ||= @chars.pop if @chars.last == "-"
+      # Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
+      while i = @chars.index("-")
+        # Prevent infinite loops from expanding [",", "-", "."] to itself
+        # (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
+        if (@chars[i-1] == ',' && @chars[i+1] == '.')
+          hyphen = @chars.delete_at(i)
+        else
+          @chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
+        end
+      end
+      # restore hyphen, if stripped out earlier
+      @chars.unshift(hyphen) if hyphen
+    end
+    def move_backslash_to_front(chars)
+      if index = chars.index { |char| char == '\\' }
+        chars.unshift chars.delete_at(index)
+      end
+      chars
+    end
+  end
+end

data/lib/regexp-examples/constants.rb CHANGED Viewed

@@ -32,21 +32,25 @@ module RegexpExamples
   end
   module CharSets
-    Lower      = Array('a'..'z')
-    Upper      = Array('A'..'Z')
-    Digit      = Array('0'..'9')
-    Punct      = [33..47, 58..64, 91..96, 123..126].map { |r| r.map { |val| val.chr } }.flatten
-    Hex        = Array('a'..'f') | Array('A'..'F') | Digit
-    Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"]
-    Any        = Lower | Upper | Digit | Punct
-  end
+    Lower        = Array('a'..'z')
+    Upper        = Array('A'..'Z')
+    Digit        = Array('0'..'9')
+    Punct        = %w(! " # % & ' ( ) * , - . / : ; ? @ [ \\ ] _ { })
+    Hex          = Array('a'..'f') | Array('A'..'F') | Digit
+    Word         = Lower | Upper | Digit | ['_']
+    Whitespace   = [' ', "\t", "\n", "\r", "\v", "\f"]
+    Control      = (0..31).map(&:chr) | ["\x7f"]
+    # Ensure that the "common" characters appear first in the array
+    Any          = Lower | Upper | Digit | Punct | (0..127).map(&:chr)
+    AnyNoNewLine = Any - ["\n"]
+  end.freeze
   # Map of special regex characters, to their associated character sets
   BackslashCharMap = {
     'd' => CharSets::Digit,
-    'D' => CharSets::Lower | CharSets::Upper | CharSets::Punct,
-    'w' => CharSets::Lower | CharSets::Upper | CharSets::Digit | ['_'],
-    'W' => CharSets::Punct.reject { |val| val == '_' },
+    'D' => CharSets::Any - CharSets::Digit,
+    'w' => CharSets::Word,
+    'W' => CharSets::Any - CharSets::Word,
     's' => CharSets::Whitespace,
     'S' => CharSets::Any - CharSets::Whitespace,
     'h' => CharSets::Hex,
@@ -59,6 +63,23 @@ module RegexpExamples
     'a' => ["\a"], # alarm
     'v' => ["\v"], # vertical tab
     'e' => ["\e"], # escape
-  }
+  }.freeze
+  POSIXCharMap = {
+    'alnum'  => CharSets::Upper | CharSets::Lower | CharSets::Digit,
+    'alpha'  => CharSets::Upper | CharSets::Lower,
+    'blank'  => [" ", "\t"],
+    'cntrl'  => CharSets::Control,
+    'digit'  => CharSets::Digit,
+    'graph'  => (CharSets::Any - CharSets::Control) - [" "], #  Visible chars
+    'lower'  => CharSets::Lower,
+    'print'  => CharSets::Any - CharSets::Control,
+    'punct'  => CharSets::Punct,
+    'space'  => CharSets::Whitespace,
+    'upper'  => CharSets::Upper,
+    'xdigit' => CharSets::Hex,
+    'word'   => CharSets::Word,
+    'ascii'  => CharSets::Any
+  }.freeze
 end

data/lib/regexp-examples/groups.rb CHANGED Viewed

@@ -63,69 +63,14 @@ module RegexpExamples
     def initialize(chars, ignorecase)
       @chars = chars
       @ignorecase = ignorecase
-      if chars[0] == "^"
-        @negative = true
-        @chars = @chars[1..-1]
-      else
-        @negative = false
-      end
-      init_backslash_chars
-      init_ranges
-    end
-    def init_ranges
-      # save first and last "-" if present
-      first = nil
-      last = nil
-      first = @chars.shift if @chars.first == "-"
-      last = @chars.pop if @chars.last == "-"
-      # Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
-      while i = @chars.index("-")
-        # Prevent infinite loops from expanding [",", "-", "."] to itself
-        # (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
-        if (@chars[i-1] == ',' && @chars[i+1] == '.')
-          first = '-'
-          @chars.delete_at(i)
-        else
-          @chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
-        end
-      end
-      # restore them back
-      @chars.unshift(first) if first
-      @chars.push(last) if last
-    end
-    def init_backslash_chars
-      @chars.each_with_index do |char, i|
-        if char == "\\"
-          if BackslashCharMap.keys.include?(@chars[i+1])
-            @chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
-          elsif @chars[i+1] == 'b'
-            @chars[i..i+1] = "\b"
-          elsif @chars[i+1] == "\\"
-            @chars.delete_at(i+1)
-          else
-            @chars.delete_at(i)
-          end
-        end
-      end
     end
     def result
-      (@negative ? (CharSets::Any - @chars) : @chars).map do |result|
+      @chars.map do |result|
         GroupResult.new(result)
       end
     end
-    private
-    def move_backslash_to_front(chars)
-      if index = chars.index { |char| char == '\\' }
-        chars.unshift chars.delete_at(index)
-      end
-      chars
-    end
   end
   class DotGroup
@@ -135,8 +80,7 @@ module RegexpExamples
     end
     def result
-      chars = CharSets::Any
-      chars = (["\n"] | chars) if multiline
+      chars = multiline ? CharSets::Any : CharSets::AnyNoNewLine
       chars.map do |result|
         GroupResult.new(result)
       end

data/lib/regexp-examples/parser.rb CHANGED Viewed

@@ -218,8 +218,11 @@ module RegexpExamples
     end
     def parse_char_group
-      if rest_of_string =~ /\A\[\[:[^:]+:\]\]/
-        raise UnsupportedSyntaxError, "POSIX bracket expressions are not yet implemented"
+      # TODO: Extract all this logic into ChargroupParser
+      if rest_of_string =~ /\A\[\[:(\^?)([^:]+):\]\]/
+        @current_position += (6 + $1.length + $2.length)
+        chars = $1.empty? ? POSIXCharMap[$2] : CharSets::Any - POSIXCharMap[$2]
+        return CharGroup.new(chars, @ignorecase)
       end
       chars = []
       @current_position += 1
@@ -238,7 +241,8 @@ module RegexpExamples
         chars << next_char
         @current_position += 1
       end
-      CharGroup.new(chars, @ignorecase)
+      parsed_chars = ChargroupParser.new(chars).result
+      CharGroup.new(parsed_chars, @ignorecase)
     end
     def parse_dot_group

data/lib/regexp-examples/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module RegexpExamples
-  VERSION = '0.5.4'
+  VERSION = '0.6.0'
 end

data/spec/regexp-examples_spec.rb CHANGED Viewed

@@ -2,10 +2,16 @@ RSpec.describe Regexp, "#examples" do
   def self.examples_exist_and_match(*regexps)
     regexps.each do |regexp|
       it do
-        regexp_examples = regexp.examples
-        expect(regexp_examples).not_to be_empty
+        begin
+          regexp_examples = regexp.examples(max_group_results: 999)
+        rescue
+          # TODO: Find a nicer way to display this?
+          puts "Error generating examples for /#{regexp.source}/"
+          raise $!
+        end
+        expect(regexp_examples).not_to be_empty, "No examples were generated for regexp: /#{regexp.source}/"
         regexp_examples.each { |example| expect(example).to match(/\A(?:#{regexp.source})\z/) }
-        # Note: /\A...\z/ is used, to prevent misleading examples from passing the test.
+        # Note: /\A...\z/ is used to prevent misleading examples from passing the test.
         # For example, we don't want things like:
         # /a*/.examples to include "xyz"
         # /a|b/.examples to include "bad"
@@ -32,7 +38,7 @@ RSpec.describe Regexp, "#examples" do
   def self.examples_are_empty(*regexps)
     regexps.each do |regexp|
       it do
-        expect(regexp.examples).to be_empty
+        expect(regexp.examples).to be_empty, "Unexpected examples for regexp: /#{regexp.source}/"
       end
     end
   end
@@ -181,8 +187,7 @@ RSpec.describe Regexp, "#examples" do
         /\p{L}/,
         /\p{Arabic}/,
         /\p{^Ll}/,
-        /(?<name> ... \g<name>*)/,
-        /[[:space:]]/
+        /(?<name> ... \g<name>*)/
       )
     end
@@ -230,13 +235,33 @@ RSpec.describe Regexp, "#examples" do
       )
     end
-    context "comment group" do
+    context "for comment groups" do
       examples_exist_and_match(
         /a(?#comment)b/,
         /a(?#ugly backslashy\ comment\\\))b/
       )
     end
+    context "for POSIX groups" do
+      examples_exist_and_match(
+        /[[:alnum:]]/,
+        /[[:alpha:]]/,
+        /[[:blank:]]/,
+        /[[:cntrl:]]/,
+        /[[:digit:]]/,
+        /[[:graph:]]/,
+        /[[:lower:]]/,
+        /[[:print:]]/,
+        /[[:punct:]]/,
+        /[[:space:]]/,
+        /[[:upper:]]/,
+        /[[:xdigit:]]/,
+        /[[:word:]]/,
+        /[[:ascii:]]/,
+        /[[:^alnum:]]/ # Negated
+      )
+    end
     context "exact examples match" do
       # More rigorous tests to assert that ALL examples are being listed
       context "default config options" do

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: regexp-examples
 version: !ruby/object:Gem::Version
-  version: 0.5.4
+  version: 0.6.0
 platform: ruby
 authors:
 - Tom Lord
@@ -55,6 +55,7 @@ files:
 - coverage/coverage-badge.png
 - lib/regexp-examples.rb
 - lib/regexp-examples/backreferences.rb
+- lib/regexp-examples/chargroup_parser.rb
 - lib/regexp-examples/constants.rb
 - lib/regexp-examples/core_extensions/regexp/examples.rb
 - lib/regexp-examples/exceptions.rb