RubyGems - pathspec - Versions diffs - 0.0.2 → 1.0.0 - Mend

pathspec 0.0.2 → 1.0.0

Files changed (20) hide show

checksums.yaml +5 -5
data/CHANGELOG.md +28 -1
data/README.md +39 -5
data/bin/pathspec-rb +80 -0
data/docs/html/pathspec-rb.html +82 -0
data/docs/man/pathspec-rb.man.1 +67 -0
data/docs/pathspec-rb.md +64 -0
data/lib/pathspec.rb +19 -27
data/lib/pathspec/gitignorespec.rb +245 -243
data/lib/pathspec/regexspec.rb +14 -10
data/lib/pathspec/spec.rb +15 -9
data/spec/files/gitignore_readme +2 -0
data/spec/files/gitignore_ruby +51 -0
data/spec/files/gitignore_simple +1 -0
data/spec/files/regex_simple +1 -0
data/spec/spec_helper.rb +6 -0
data/spec/unit/pathspec/gitignorespec_spec.rb +303 -0
data/spec/unit/pathspec/spec_spec.rb +12 -0
data/spec/unit/pathspec_spec.rb +386 -0
metadata +111 -21

data/lib/pathspec/gitignorespec.rb CHANGED

@@ -1,151 +1,159 @@
-# encoding: utf-8
 require 'pathspec/regexspec'
-class GitIgnoreSpec < RegexSpec
-  attr_reader :regex
-  def initialize(pattern)
-    pattern = pattern.strip unless pattern.nil?
-    # A pattern starting with a hash ('#') serves as a comment
-    # (neither includes nor excludes files). Escape the hash with a
-    # back-slash to match a literal hash (i.e., '\#').
-    if pattern.start_with?('#')
-      @regex = nil
-      @inclusive = nil
-    # A blank pattern is a null-operation (neither includes nor
-    # excludes files).
-    elsif pattern.empty?
-      @regex = nil
-      @inclusive = nil
-    # Patterns containing three or more consecutive stars are invalid and
-    # will be ignored.
-    elsif pattern =~ /\*\*\*+/
-      @regex = nil
-      @inclusive = nil
-    # We have a valid pattern!
-    else
-      # A pattern starting with an exclamation mark ('!') negates the
-      # pattern (exclude instead of include). Escape the exclamation
-      # mark with a back-slash to match a literal exclamation mark
-      # (i.e., '\!').
-      if pattern.start_with?('!')
-        @inclusive = false
-        # Remove leading exclamation mark.
-        pattern = pattern[1..-1]
+class PathSpec
+  # Class for parsing a .gitignore spec
+  class GitIgnoreSpec < RegexSpec
+    attr_reader :regex, :pattern
+    def initialize(original_pattern) # rubocop:disable Metrics/CyclomaticComplexity
+      pattern = original_pattern.strip unless original_pattern.nil?
+      # A pattern starting with a hash ('#') serves as a comment
+      # (neither includes nor excludes files). Escape the hash with a
+      # back-slash to match a literal hash (i.e., '\#').
+      if pattern.start_with?('#')
+        @regex = nil
+        @inclusive = nil
+        # A blank pattern is a null-operation (neither includes nor
+        # excludes files).
+      elsif pattern.empty? # rubocop:disable Lint/DuplicateBranch
+        @regex = nil
+        @inclusive = nil
+        # Patterns containing three or more consecutive stars are invalid and
+        # will be ignored.
+      elsif /\*\*\*+/.match?(pattern) # rubocop:disable Lint/DuplicateBranch
+        @regex = nil
+        @inclusive = nil
+        # EDGE CASE: According to git check-ignore (v2.4.1)), a single '/'
+        # does not match any file
+      elsif pattern == '/' # rubocop:disable Lint/DuplicateBranch
+        @regex = nil
+        @inclusive = nil
+        # We have a valid pattern!
       else
-        @inclusive = true
-      end
-      # Remove leading back-slash escape for escaped hash ('#') or
-      # exclamation mark ('!').
-      if pattern.start_with?('\\')
-        pattern = pattern[1..-1]
-      end
-      # Split pattern into segments. -1 to allow trailing slashes.
-      pattern_segs = pattern.split('/', -1)
-      # Normalize pattern to make processing easier.
-      # A pattern beginning with a slash ('/') will only match paths
-      # directly on the root directory instead of any descendant
-      # paths. So, remove empty first segment to make pattern relative
-      # to root.
-      if pattern_segs[0].empty?
-        pattern_segs.shift
-      else
-        # A pattern without a beginning slash ('/') will match any
-        # descendant path. This is equivilent to "**/{pattern}". So,
-        # prepend with double-asterisks to make pattern relative to
-        # root.
-        if pattern_segs.length == 1 && pattern_segs[0] != '**'
-          pattern_segs.insert(0, '**')
+        # A pattern starting with an exclamation mark ('!') negates the
+        # pattern (exclude instead of include). Escape the exclamation
+        # mark with a back-slash to match a literal exclamation mark
+        # (i.e., '\!').
+        if pattern.start_with?('!')
+          @inclusive = false
+          # Remove leading exclamation mark.
+          pattern = pattern[1..]
+        else
+          @inclusive = true
         end
-      end
-      # A pattern ending with a slash ('/') will match all descendant
-      # paths of if it is a directory but not if it is a regular file.
-      # This is equivilent to "{pattern}/**". So, set last segment to
-      # double asterisks to include all descendants.
-      if pattern_segs[-1].empty?
-        pattern_segs[-1] = '**'
-      end
-      # Handle platforms with backslash separated paths
-      if File::SEPARATOR == '\\'
-        path_sep = '\\\\'
-      else
-        path_sep = '/'
-      end
+        # Remove leading back-slash escape for escaped hash ('#') or
+        # exclamation mark ('!').
+        pattern = pattern[1..] if pattern.start_with?('\\')
+        # Split pattern into segments. -1 to allow trailing slashes.
+        pattern_segs = pattern.split('/', -1)
+        # Normalize pattern to make processing easier.
+        # A pattern beginning with a slash ('/') will only match paths
+        # directly on the root directory instead of any descendant
+        # paths. So, remove empty first segment to make pattern relative
+        # to root.
+        if pattern_segs[0].empty?
+          pattern_segs.shift
+        elsif pattern_segs.length == 1 ||
+              pattern_segs.length == 2 && pattern_segs[-1].empty?
+          # A pattern without a beginning slash ('/') will match any
+          # descendant path. This is equivilent to "**/{pattern}". So,
+          # prepend with double-asterisks to make pattern relative to
+          # root.
+          # EDGE CASE: This also holds for a single pattern with a
+          # trailing slash (e.g. dir/).
+          pattern_segs.insert(0, '**') if pattern_segs[0] != '**'
+        end
+        # A pattern ending with a slash ('/') will match all descendant
+        # paths of if it is a directory but not if it is a regular file.
+        # This is equivilent to "{pattern}/**". So, set last segment to
+        # double asterisks to include all descendants.
+        pattern_segs[-1] = '**' if pattern_segs[-1].empty? && pattern_segs.length > 1
+        # Handle platforms with backslash separated paths
+        path_sep = if File::SEPARATOR == '\\'
+                     '\\\\'
+                   else
+                     '/'
+                   end
+        # Build regular expression from pattern.
+        regex = '^'
+        need_slash = false
+        regex_end = pattern_segs.size - 1
+        pattern_segs.each_index do |i|
+          seg = pattern_segs[i]
+          case seg
+          when '**'
+            # A pattern consisting solely of double-asterisks ('**')
+            # will match every path.
+            if i == 0 && i == regex_end
+              regex.concat('.+')
+              # A normalized pattern beginning with double-asterisks
+              # ('**') will match any leading path segments.
+            elsif i == 0
+              regex.concat("(?:.+#{path_sep})?")
+              need_slash = false
+              # A normalized pattern ending with double-asterisks ('**')
+              # will match any trailing path segments.
+            elsif i == regex_end
+              regex.concat("#{path_sep}.*")
+              # A pattern with inner double-asterisks ('**') will match
+              # multiple (or zero) inner path segments.
+            else
+              regex.concat("(?:#{path_sep}.+)?")
+              need_slash = true
+            end
+            # Match single path segment.
+          when '*'
+            regex.concat(path_sep) if need_slash
+            regex.concat("[^#{path_sep}]+")
+            need_slash = true
-      # Build regular expression from pattern.
-      regex = '^'
-      need_slash = false
-      regex_end = pattern_segs.size - 1
-      pattern_segs.each_index do |i|
-        seg = pattern_segs[i]
-        if seg == '**'
-          # A pattern consisting solely of double-asterisks ('**')
-          # will match every path.
-          if i == 0 && i == regex_end
-            regex.concat('.+')
-          # A normalized pattern beginning with double-asterisks
-          # ('**') will match any leading path segments.
-          elsif i == 0
-            regex.concat("(?:.+#{path_sep})?")
-            need_slash = false
-          # A normalized pattern ending with double-asterisks ('**')
-          # will match any trailing path segments.
-          elsif i == regex_end
-            regex.concat("#{path_sep}.*")
-          # A pattern with inner double-asterisks ('**') will match
-          # multiple (or zero) inner path segments.
           else
-            regex.concat("(?:#{path_sep}.+)?")
-            need_slash = true
-          end
+            # Match segment glob pattern.
+            regex.concat(path_sep) if need_slash
-        # Match single path segment.
-        elsif seg == '*'
-          if need_slash
-            regex.concat(path_sep)
-          end
+            regex.concat(translate_segment_glob(seg))
-          regex.concat("[^#{path_sep}]+")
-          need_slash = true
+            if i == regex_end && @inclusive
+              # A pattern ending without a slash ('/') will match a file
+              # or a directory (with paths underneath it).
+              # e.g. foo matches: foo, foo/bar, foo/bar/baz, etc.
+              # EDGE CASE: However, this does not hold for exclusion cases
+              # according to `git check-ignore` (v2.4.1).
+              regex.concat("(?:#{path_sep}.*)?")
+            end
-        else
-          # Match segment glob pattern.
-          if need_slash
-            regex.concat(path_sep)
+            need_slash = true
           end
-          regex.concat(translate_segment_glob(seg))
-          need_slash = true
         end
-      end
-      regex.concat('$')
-      super(regex)
-    end
-  end
+        regex.concat('$')
+        super(regex)
-  def match(path)
-    super(path)
-  end
+        # Copy original pattern
+        @pattern = original_pattern.dup
+      end
+    end
-  def translate_segment_glob(pattern)
-    """
+    def translate_segment_glob(pattern)
+      ''"
     Translates the glob pattern to a regular expression. This is used in
     the constructor to translate a path segment glob pattern to its
     corresponding regular expression.
@@ -153,123 +161,117 @@ class GitIgnoreSpec < RegexSpec
     *pattern* (``str``) is the glob pattern.
     Returns the regular expression (``str``).
-    """
-    # NOTE: This is derived from `fnmatch.translate()` and is similar to
-    # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
-    escape = false
-    regex = ''
-    i = 0
-    while i < pattern.size
-      # Get next character.
-      char = pattern[i].chr
-      i += 1
-      # Escape the character.
-      if escape
-        escape = false
-        regex += Regexp.escape(char)
-      # Escape character, escape next character.
-      elsif char == '\\'
-        escape = true
-      # Multi-character wildcard. Match any string (except slashes),
-      # including an empty string.
-      elsif char == '*'
-        regex += '[^/]*'
-      # Single-character wildcard. Match any single character (except
-      # a slash).
-      elsif char == '?'
-        regex += '[^/]'
-      # Braket expression wildcard. Except for the beginning
-      # exclamation mark, the whole braket expression can be used
-      # directly as regex but we have to find where the expression
-      # ends.
-      # - "[][!]" matchs ']', '[' and '!'.
-      # - "[]-]" matchs ']' and '-'.
-      # - "[!]a-]" matchs any character except ']', 'a' and '-'.
-      elsif char == '['
-        j = i
-        # Pass brack expression negation.
-        if j < pattern.size && pattern[j].chr == '!'
-          j += 1
-        end
-        # Pass first closing braket if it is at the beginning of the
-        # expression.
-        if j < pattern.size && pattern[j].chr == ']'
-          j += 1
-        end
-        # Find closing braket. Stop once we reach the end or find it.
-        while j < pattern.size && pattern[j].chr != ']'
-          j += 1
-        end
-        if j < pattern.size
-          expr = '['
-          # Braket expression needs to be negated.
-          if pattern[i].chr == '!'
-            expr += '^'
-            i += 1
-          # POSIX declares that the regex braket expression negation
-          # "[^...]" is undefined in a glob pattern. Python's
-          # `fnmatch.translate()` escapes the caret ('^') as a
-          # literal. To maintain consistency with undefined behavior,
-          # I am escaping the '^' as well.
-          elsif pattern[i].chr == '^'
-            expr += '\\^'
-            i += 1
-          end
-          # Escape brackets contained within pattern
-          if pattern[i].chr == ']' && i != j
-            expr += '\]'
-            i += 1
+      "''
+      # NOTE: This is derived from `fnmatch.translate()` and is similar to
+      # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
+      escape = false
+      regex = ''
+      i = 0
+      while i < pattern.size
+        # Get next character.
+        char = pattern[i].chr
+        i += 1
+        # Escape the character.
+        if escape
+          escape = false
+          regex += Regexp.escape(char)
+          # Escape character, escape next character.
+        elsif char == '\\'
+          escape = true
+          # Multi-character wildcard. Match any string (except slashes),
+          # including an empty string.
+        elsif char == '*'
+          regex += '[^/]*'
+          # Single-character wildcard. Match any single character (except
+          # a slash).
+        elsif char == '?'
+          regex += '[^/]'
+          # Braket expression wildcard. Except for the beginning
+          # exclamation mark, the whole braket expression can be used
+          # directly as regex but we have to find where the expression
+          # ends.
+          # - "[][!]" matchs ']', '[' and '!'.
+          # - "[]-]" matchs ']' and '-'.
+          # - "[!]a-]" matchs any character except ']', 'a' and '-'.
+        elsif char == '['
+          j = i
+          # Pass brack expression negation.
+          j += 1 if j < pattern.size && pattern[j].chr == '!'
+          # Pass first closing braket if it is at the beginning of the
+          # expression.
+          j += 1 if j < pattern.size && pattern[j].chr == ']'
+          # Find closing braket. Stop once we reach the end or find it.
+          j += 1 while j < pattern.size && pattern[j].chr != ']'
+          if j < pattern.size
+            expr = '['
+            # Braket expression needs to be negated.
+            case pattern[i].chr
+            when '!'
+              expr += '^'
+              i += 1
+              # POSIX declares that the regex braket expression negation
+              # "[^...]" is undefined in a glob pattern. Python's
+              # `fnmatch.translate()` escapes the caret ('^') as a
+              # literal. To maintain consistency with undefined behavior,
+              # I am escaping the '^' as well.
+            when '^'
+              expr += '\\^'
+              i += 1
+            end
+            # Escape brackets contained within pattern
+            if pattern[i].chr == ']' && i != j
+              expr += '\]'
+              i += 1
+            end
+            # Build regex braket expression. Escape slashes so they are
+            # treated as literal slashes by regex as defined by POSIX.
+            expr += pattern[i..j].sub('\\', '\\\\')
+            # Add regex braket expression to regex result.
+            regex += expr
+            # Found end of braket expression. Increment j to be one past
+            # the closing braket:
+            #
+            #  [...]
+            #   ^   ^
+            #   i   j
+            #
+            j += 1
+            # Set i to one past the closing braket.
+            i = j
+            # Failed to find closing braket, treat opening braket as a
+            # braket literal instead of as an expression.
+          else
+            regex += '\['
           end
-          # Build regex braket expression. Escape slashes so they are
-          # treated as literal slashes by regex as defined by POSIX.
-          expr += pattern[i..j].sub('\\', '\\\\')
-          # Add regex braket expression to regex result.
-          regex += expr
-          # Found end of braket expression. Increment j to be one past
-          # the closing braket:
-          #
-          #  [...]
-          #   ^   ^
-          #   i   j
-          #
-          j += 1
-          # Set i to one past the closing braket.
-          i = j
-        # Failed to find closing braket, treat opening braket as a
-        # braket literal instead of as an expression.
+          # Regular character, escape it for regex.
         else
-          regex += '\['
+          regex << Regexp.escape(char)
         end
-      # Regular character, escape it for regex.
-      else
-        regex << Regexp.escape(char)
       end
-    end
-    regex
-  end
+      regex
+    end
-  def inclusive?
-    @inclusive
+    def inclusive?
+      @inclusive
+    end
   end
 end