pathspec 0.0.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,151 +1,159 @@
1
- # encoding: utf-8
2
-
3
1
  require 'pathspec/regexspec'
4
2
 
5
- class GitIgnoreSpec < RegexSpec
6
- attr_reader :regex
7
-
8
- def initialize(pattern)
9
- pattern = pattern.strip unless pattern.nil?
10
-
11
- # A pattern starting with a hash ('#') serves as a comment
12
- # (neither includes nor excludes files). Escape the hash with a
13
- # back-slash to match a literal hash (i.e., '\#').
14
- if pattern.start_with?('#')
15
- @regex = nil
16
- @inclusive = nil
17
-
18
- # A blank pattern is a null-operation (neither includes nor
19
- # excludes files).
20
- elsif pattern.empty?
21
- @regex = nil
22
- @inclusive = nil
23
-
24
- # Patterns containing three or more consecutive stars are invalid and
25
- # will be ignored.
26
- elsif pattern =~ /\*\*\*+/
27
- @regex = nil
28
- @inclusive = nil
29
-
30
- # We have a valid pattern!
31
- else
32
- # A pattern starting with an exclamation mark ('!') negates the
33
- # pattern (exclude instead of include). Escape the exclamation
34
- # mark with a back-slash to match a literal exclamation mark
35
- # (i.e., '\!').
36
- if pattern.start_with?('!')
37
- @inclusive = false
38
- # Remove leading exclamation mark.
39
- pattern = pattern[1..-1]
3
+ class PathSpec
4
+ # Class for parsing a .gitignore spec
5
+ class GitIgnoreSpec < RegexSpec
6
+ attr_reader :regex, :pattern
7
+
8
+ def initialize(original_pattern) # rubocop:disable Metrics/CyclomaticComplexity
9
+ pattern = original_pattern.strip unless original_pattern.nil?
10
+
11
+ # A pattern starting with a hash ('#') serves as a comment
12
+ # (neither includes nor excludes files). Escape the hash with a
13
+ # back-slash to match a literal hash (i.e., '\#').
14
+ if pattern.start_with?('#')
15
+ @regex = nil
16
+ @inclusive = nil
17
+
18
+ # A blank pattern is a null-operation (neither includes nor
19
+ # excludes files).
20
+ elsif pattern.empty? # rubocop:disable Lint/DuplicateBranch
21
+ @regex = nil
22
+ @inclusive = nil
23
+
24
+ # Patterns containing three or more consecutive stars are invalid and
25
+ # will be ignored.
26
+ elsif /\*\*\*+/.match?(pattern) # rubocop:disable Lint/DuplicateBranch
27
+ @regex = nil
28
+ @inclusive = nil
29
+
30
+ # EDGE CASE: According to git check-ignore (v2.4.1)), a single '/'
31
+ # does not match any file
32
+ elsif pattern == '/' # rubocop:disable Lint/DuplicateBranch
33
+ @regex = nil
34
+ @inclusive = nil
35
+
36
+ # We have a valid pattern!
40
37
  else
41
- @inclusive = true
42
- end
43
-
44
- # Remove leading back-slash escape for escaped hash ('#') or
45
- # exclamation mark ('!').
46
- if pattern.start_with?('\\')
47
- pattern = pattern[1..-1]
48
- end
49
-
50
- # Split pattern into segments. -1 to allow trailing slashes.
51
- pattern_segs = pattern.split('/', -1)
52
-
53
- # Normalize pattern to make processing easier.
54
-
55
- # A pattern beginning with a slash ('/') will only match paths
56
- # directly on the root directory instead of any descendant
57
- # paths. So, remove empty first segment to make pattern relative
58
- # to root.
59
- if pattern_segs[0].empty?
60
- pattern_segs.shift
61
- else
62
- # A pattern without a beginning slash ('/') will match any
63
- # descendant path. This is equivilent to "**/{pattern}". So,
64
- # prepend with double-asterisks to make pattern relative to
65
- # root.
66
- if pattern_segs.length == 1 && pattern_segs[0] != '**'
67
- pattern_segs.insert(0, '**')
38
+ # A pattern starting with an exclamation mark ('!') negates the
39
+ # pattern (exclude instead of include). Escape the exclamation
40
+ # mark with a back-slash to match a literal exclamation mark
41
+ # (i.e., '\!').
42
+ if pattern.start_with?('!')
43
+ @inclusive = false
44
+ # Remove leading exclamation mark.
45
+ pattern = pattern[1..]
46
+ else
47
+ @inclusive = true
68
48
  end
69
- end
70
49
 
71
- # A pattern ending with a slash ('/') will match all descendant
72
- # paths of if it is a directory but not if it is a regular file.
73
- # This is equivilent to "{pattern}/**". So, set last segment to
74
- # double asterisks to include all descendants.
75
- if pattern_segs[-1].empty?
76
- pattern_segs[-1] = '**'
77
- end
78
-
79
- # Handle platforms with backslash separated paths
80
- if File::SEPARATOR == '\\'
81
- path_sep = '\\\\'
82
- else
83
- path_sep = '/'
84
- end
50
+ # Remove leading back-slash escape for escaped hash ('#') or
51
+ # exclamation mark ('!').
52
+ pattern = pattern[1..] if pattern.start_with?('\\')
53
+
54
+ # Split pattern into segments. -1 to allow trailing slashes.
55
+ pattern_segs = pattern.split('/', -1)
56
+
57
+ # Normalize pattern to make processing easier.
58
+
59
+ # A pattern beginning with a slash ('/') will only match paths
60
+ # directly on the root directory instead of any descendant
61
+ # paths. So, remove empty first segment to make pattern relative
62
+ # to root.
63
+ if pattern_segs[0].empty?
64
+ pattern_segs.shift
65
+ elsif pattern_segs.length == 1 ||
66
+ pattern_segs.length == 2 && pattern_segs[-1].empty?
67
+ # A pattern without a beginning slash ('/') will match any
68
+ # descendant path. This is equivilent to "**/{pattern}". So,
69
+ # prepend with double-asterisks to make pattern relative to
70
+ # root.
71
+ # EDGE CASE: This also holds for a single pattern with a
72
+ # trailing slash (e.g. dir/).
73
+ pattern_segs.insert(0, '**') if pattern_segs[0] != '**'
74
+ end
85
75
 
76
+ # A pattern ending with a slash ('/') will match all descendant
77
+ # paths of if it is a directory but not if it is a regular file.
78
+ # This is equivilent to "{pattern}/**". So, set last segment to
79
+ # double asterisks to include all descendants.
80
+ pattern_segs[-1] = '**' if pattern_segs[-1].empty? && pattern_segs.length > 1
81
+
82
+ # Handle platforms with backslash separated paths
83
+ path_sep = if File::SEPARATOR == '\\'
84
+ '\\\\'
85
+ else
86
+ '/'
87
+ end
88
+
89
+ # Build regular expression from pattern.
90
+ regex = '^'
91
+ need_slash = false
92
+ regex_end = pattern_segs.size - 1
93
+ pattern_segs.each_index do |i|
94
+ seg = pattern_segs[i]
95
+
96
+ case seg
97
+ when '**'
98
+ # A pattern consisting solely of double-asterisks ('**')
99
+ # will match every path.
100
+ if i == 0 && i == regex_end
101
+ regex.concat('.+')
102
+
103
+ # A normalized pattern beginning with double-asterisks
104
+ # ('**') will match any leading path segments.
105
+ elsif i == 0
106
+ regex.concat("(?:.+#{path_sep})?")
107
+ need_slash = false
108
+
109
+ # A normalized pattern ending with double-asterisks ('**')
110
+ # will match any trailing path segments.
111
+ elsif i == regex_end
112
+ regex.concat("#{path_sep}.*")
113
+
114
+ # A pattern with inner double-asterisks ('**') will match
115
+ # multiple (or zero) inner path segments.
116
+ else
117
+ regex.concat("(?:#{path_sep}.+)?")
118
+ need_slash = true
119
+ end
120
+
121
+ # Match single path segment.
122
+ when '*'
123
+ regex.concat(path_sep) if need_slash
124
+
125
+ regex.concat("[^#{path_sep}]+")
126
+ need_slash = true
86
127
 
87
- # Build regular expression from pattern.
88
- regex = '^'
89
- need_slash = false
90
- regex_end = pattern_segs.size - 1
91
- pattern_segs.each_index do |i|
92
- seg = pattern_segs[i]
93
-
94
- if seg == '**'
95
- # A pattern consisting solely of double-asterisks ('**')
96
- # will match every path.
97
- if i == 0 && i == regex_end
98
- regex.concat('.+')
99
-
100
- # A normalized pattern beginning with double-asterisks
101
- # ('**') will match any leading path segments.
102
- elsif i == 0
103
- regex.concat("(?:.+#{path_sep})?")
104
- need_slash = false
105
-
106
- # A normalized pattern ending with double-asterisks ('**')
107
- # will match any trailing path segments.
108
- elsif i == regex_end
109
- regex.concat("#{path_sep}.*")
110
-
111
- # A pattern with inner double-asterisks ('**') will match
112
- # multiple (or zero) inner path segments.
113
128
  else
114
- regex.concat("(?:#{path_sep}.+)?")
115
- need_slash = true
116
- end
129
+ # Match segment glob pattern.
130
+ regex.concat(path_sep) if need_slash
117
131
 
118
- # Match single path segment.
119
- elsif seg == '*'
120
- if need_slash
121
- regex.concat(path_sep)
122
- end
132
+ regex.concat(translate_segment_glob(seg))
123
133
 
124
- regex.concat("[^#{path_sep}]+")
125
- need_slash = true
134
+ if i == regex_end && @inclusive
135
+ # A pattern ending without a slash ('/') will match a file
136
+ # or a directory (with paths underneath it).
137
+ # e.g. foo matches: foo, foo/bar, foo/bar/baz, etc.
138
+ # EDGE CASE: However, this does not hold for exclusion cases
139
+ # according to `git check-ignore` (v2.4.1).
140
+ regex.concat("(?:#{path_sep}.*)?")
141
+ end
126
142
 
127
- else
128
- # Match segment glob pattern.
129
- if need_slash
130
- regex.concat(path_sep)
143
+ need_slash = true
131
144
  end
132
-
133
- regex.concat(translate_segment_glob(seg))
134
- need_slash = true
135
145
  end
136
- end
137
146
 
138
- regex.concat('$')
139
- super(regex)
140
- end
141
- end
147
+ regex.concat('$')
148
+ super(regex)
142
149
 
143
- def match(path)
144
- super(path)
145
- end
150
+ # Copy original pattern
151
+ @pattern = original_pattern.dup
152
+ end
153
+ end
146
154
 
147
- def translate_segment_glob(pattern)
148
- """
155
+ def translate_segment_glob(pattern)
156
+ ''"
149
157
  Translates the glob pattern to a regular expression. This is used in
150
158
  the constructor to translate a path segment glob pattern to its
151
159
  corresponding regular expression.
@@ -153,123 +161,117 @@ class GitIgnoreSpec < RegexSpec
153
161
  *pattern* (``str``) is the glob pattern.
154
162
 
155
163
  Returns the regular expression (``str``).
156
- """
157
- # NOTE: This is derived from `fnmatch.translate()` and is similar to
158
- # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
159
-
160
- escape = false
161
- regex = ''
162
- i = 0
163
-
164
- while i < pattern.size
165
- # Get next character.
166
- char = pattern[i].chr
167
- i += 1
168
-
169
- # Escape the character.
170
- if escape
171
- escape = false
172
- regex += Regexp.escape(char)
173
-
174
- # Escape character, escape next character.
175
- elsif char == '\\'
176
- escape = true
177
-
178
- # Multi-character wildcard. Match any string (except slashes),
179
- # including an empty string.
180
- elsif char == '*'
181
- regex += '[^/]*'
182
-
183
- # Single-character wildcard. Match any single character (except
184
- # a slash).
185
- elsif char == '?'
186
- regex += '[^/]'
187
-
188
- # Braket expression wildcard. Except for the beginning
189
- # exclamation mark, the whole braket expression can be used
190
- # directly as regex but we have to find where the expression
191
- # ends.
192
- # - "[][!]" matchs ']', '[' and '!'.
193
- # - "[]-]" matchs ']' and '-'.
194
- # - "[!]a-]" matchs any character except ']', 'a' and '-'.
195
- elsif char == '['
196
- j = i
197
- # Pass brack expression negation.
198
- if j < pattern.size && pattern[j].chr == '!'
199
- j += 1
200
- end
201
-
202
- # Pass first closing braket if it is at the beginning of the
203
- # expression.
204
- if j < pattern.size && pattern[j].chr == ']'
205
- j += 1
206
- end
207
-
208
- # Find closing braket. Stop once we reach the end or find it.
209
- while j < pattern.size && pattern[j].chr != ']'
210
- j += 1
211
- end
212
-
213
-
214
- if j < pattern.size
215
- expr = '['
216
-
217
- # Braket expression needs to be negated.
218
- if pattern[i].chr == '!'
219
- expr += '^'
220
- i += 1
221
-
222
- # POSIX declares that the regex braket expression negation
223
- # "[^...]" is undefined in a glob pattern. Python's
224
- # `fnmatch.translate()` escapes the caret ('^') as a
225
- # literal. To maintain consistency with undefined behavior,
226
- # I am escaping the '^' as well.
227
- elsif pattern[i].chr == '^'
228
- expr += '\\^'
229
- i += 1
230
- end
231
-
232
- # Escape brackets contained within pattern
233
- if pattern[i].chr == ']' && i != j
234
- expr += '\]'
235
- i += 1
164
+ "''
165
+ # NOTE: This is derived from `fnmatch.translate()` and is similar to
166
+ # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
167
+
168
+ escape = false
169
+ regex = ''
170
+ i = 0
171
+
172
+ while i < pattern.size
173
+ # Get next character.
174
+ char = pattern[i].chr
175
+ i += 1
176
+
177
+ # Escape the character.
178
+ if escape
179
+ escape = false
180
+ regex += Regexp.escape(char)
181
+
182
+ # Escape character, escape next character.
183
+ elsif char == '\\'
184
+ escape = true
185
+
186
+ # Multi-character wildcard. Match any string (except slashes),
187
+ # including an empty string.
188
+ elsif char == '*'
189
+ regex += '[^/]*'
190
+
191
+ # Single-character wildcard. Match any single character (except
192
+ # a slash).
193
+ elsif char == '?'
194
+ regex += '[^/]'
195
+
196
+ # Braket expression wildcard. Except for the beginning
197
+ # exclamation mark, the whole braket expression can be used
198
+ # directly as regex but we have to find where the expression
199
+ # ends.
200
+ # - "[][!]" matchs ']', '[' and '!'.
201
+ # - "[]-]" matchs ']' and '-'.
202
+ # - "[!]a-]" matchs any character except ']', 'a' and '-'.
203
+ elsif char == '['
204
+ j = i
205
+ # Pass brack expression negation.
206
+ j += 1 if j < pattern.size && pattern[j].chr == '!'
207
+
208
+ # Pass first closing braket if it is at the beginning of the
209
+ # expression.
210
+ j += 1 if j < pattern.size && pattern[j].chr == ']'
211
+
212
+ # Find closing braket. Stop once we reach the end or find it.
213
+ j += 1 while j < pattern.size && pattern[j].chr != ']'
214
+
215
+ if j < pattern.size
216
+ expr = '['
217
+
218
+ # Braket expression needs to be negated.
219
+ case pattern[i].chr
220
+ when '!'
221
+ expr += '^'
222
+ i += 1
223
+
224
+ # POSIX declares that the regex braket expression negation
225
+ # "[^...]" is undefined in a glob pattern. Python's
226
+ # `fnmatch.translate()` escapes the caret ('^') as a
227
+ # literal. To maintain consistency with undefined behavior,
228
+ # I am escaping the '^' as well.
229
+ when '^'
230
+ expr += '\\^'
231
+ i += 1
232
+ end
233
+
234
+ # Escape brackets contained within pattern
235
+ if pattern[i].chr == ']' && i != j
236
+ expr += '\]'
237
+ i += 1
238
+ end
239
+
240
+ # Build regex braket expression. Escape slashes so they are
241
+ # treated as literal slashes by regex as defined by POSIX.
242
+ expr += pattern[i..j].sub('\\', '\\\\')
243
+
244
+ # Add regex braket expression to regex result.
245
+ regex += expr
246
+
247
+ # Found end of braket expression. Increment j to be one past
248
+ # the closing braket:
249
+ #
250
+ # [...]
251
+ # ^ ^
252
+ # i j
253
+ #
254
+ j += 1
255
+ # Set i to one past the closing braket.
256
+ i = j
257
+
258
+ # Failed to find closing braket, treat opening braket as a
259
+ # braket literal instead of as an expression.
260
+ else
261
+ regex += '\['
236
262
  end
237
263
 
238
-
239
- # Build regex braket expression. Escape slashes so they are
240
- # treated as literal slashes by regex as defined by POSIX.
241
- expr += pattern[i..j].sub('\\', '\\\\')
242
-
243
- # Add regex braket expression to regex result.
244
- regex += expr
245
-
246
- # Found end of braket expression. Increment j to be one past
247
- # the closing braket:
248
- #
249
- # [...]
250
- # ^ ^
251
- # i j
252
- #
253
- j += 1
254
- # Set i to one past the closing braket.
255
- i = j
256
-
257
- # Failed to find closing braket, treat opening braket as a
258
- # braket literal instead of as an expression.
264
+ # Regular character, escape it for regex.
259
265
  else
260
- regex += '\['
266
+ regex << Regexp.escape(char)
261
267
  end
262
-
263
- # Regular character, escape it for regex.
264
- else
265
- regex << Regexp.escape(char)
266
268
  end
267
- end
268
269
 
269
- regex
270
- end
270
+ regex
271
+ end
271
272
 
272
- def inclusive?
273
- @inclusive
273
+ def inclusive?
274
+ @inclusive
275
+ end
274
276
  end
275
277
  end