pathspec 0.0.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,151 +1,159 @@
1
- # encoding: utf-8
2
-
3
1
  require 'pathspec/regexspec'
4
2
 
5
- class GitIgnoreSpec < RegexSpec
6
- attr_reader :regex
7
-
8
- def initialize(pattern)
9
- pattern = pattern.strip unless pattern.nil?
10
-
11
- # A pattern starting with a hash ('#') serves as a comment
12
- # (neither includes nor excludes files). Escape the hash with a
13
- # back-slash to match a literal hash (i.e., '\#').
14
- if pattern.start_with?('#')
15
- @regex = nil
16
- @inclusive = nil
17
-
18
- # A blank pattern is a null-operation (neither includes nor
19
- # excludes files).
20
- elsif pattern.empty?
21
- @regex = nil
22
- @inclusive = nil
23
-
24
- # Patterns containing three or more consecutive stars are invalid and
25
- # will be ignored.
26
- elsif pattern =~ /\*\*\*+/
27
- @regex = nil
28
- @inclusive = nil
29
-
30
- # We have a valid pattern!
31
- else
32
- # A pattern starting with an exclamation mark ('!') negates the
33
- # pattern (exclude instead of include). Escape the exclamation
34
- # mark with a back-slash to match a literal exclamation mark
35
- # (i.e., '\!').
36
- if pattern.start_with?('!')
37
- @inclusive = false
38
- # Remove leading exclamation mark.
39
- pattern = pattern[1..-1]
3
+ class PathSpec
4
+ # Class for parsing a .gitignore spec
5
+ class GitIgnoreSpec < RegexSpec
6
+ attr_reader :regex, :pattern
7
+
8
+ def initialize(original_pattern) # rubocop:disable Metrics/CyclomaticComplexity
9
+ pattern = original_pattern.strip unless original_pattern.nil?
10
+
11
+ # A pattern starting with a hash ('#') serves as a comment
12
+ # (neither includes nor excludes files). Escape the hash with a
13
+ # back-slash to match a literal hash (i.e., '\#').
14
+ if pattern.start_with?('#')
15
+ @regex = nil
16
+ @inclusive = nil
17
+
18
+ # A blank pattern is a null-operation (neither includes nor
19
+ # excludes files).
20
+ elsif pattern.empty? # rubocop:disable Lint/DuplicateBranch
21
+ @regex = nil
22
+ @inclusive = nil
23
+
24
+ # Patterns containing three or more consecutive stars are invalid and
25
+ # will be ignored.
26
+ elsif /\*\*\*+/.match?(pattern) # rubocop:disable Lint/DuplicateBranch
27
+ @regex = nil
28
+ @inclusive = nil
29
+
30
+ # EDGE CASE: According to git check-ignore (v2.4.1)), a single '/'
31
+ # does not match any file
32
+ elsif pattern == '/' # rubocop:disable Lint/DuplicateBranch
33
+ @regex = nil
34
+ @inclusive = nil
35
+
36
+ # We have a valid pattern!
40
37
  else
41
- @inclusive = true
42
- end
43
-
44
- # Remove leading back-slash escape for escaped hash ('#') or
45
- # exclamation mark ('!').
46
- if pattern.start_with?('\\')
47
- pattern = pattern[1..-1]
48
- end
49
-
50
- # Split pattern into segments. -1 to allow trailing slashes.
51
- pattern_segs = pattern.split('/', -1)
52
-
53
- # Normalize pattern to make processing easier.
54
-
55
- # A pattern beginning with a slash ('/') will only match paths
56
- # directly on the root directory instead of any descendant
57
- # paths. So, remove empty first segment to make pattern relative
58
- # to root.
59
- if pattern_segs[0].empty?
60
- pattern_segs.shift
61
- else
62
- # A pattern without a beginning slash ('/') will match any
63
- # descendant path. This is equivilent to "**/{pattern}". So,
64
- # prepend with double-asterisks to make pattern relative to
65
- # root.
66
- if pattern_segs.length == 1 && pattern_segs[0] != '**'
67
- pattern_segs.insert(0, '**')
38
+ # A pattern starting with an exclamation mark ('!') negates the
39
+ # pattern (exclude instead of include). Escape the exclamation
40
+ # mark with a back-slash to match a literal exclamation mark
41
+ # (i.e., '\!').
42
+ if pattern.start_with?('!')
43
+ @inclusive = false
44
+ # Remove leading exclamation mark.
45
+ pattern = pattern[1..]
46
+ else
47
+ @inclusive = true
68
48
  end
69
- end
70
49
 
71
- # A pattern ending with a slash ('/') will match all descendant
72
- # paths of if it is a directory but not if it is a regular file.
73
- # This is equivilent to "{pattern}/**". So, set last segment to
74
- # double asterisks to include all descendants.
75
- if pattern_segs[-1].empty?
76
- pattern_segs[-1] = '**'
77
- end
78
-
79
- # Handle platforms with backslash separated paths
80
- if File::SEPARATOR == '\\'
81
- path_sep = '\\\\'
82
- else
83
- path_sep = '/'
84
- end
50
+ # Remove leading back-slash escape for escaped hash ('#') or
51
+ # exclamation mark ('!').
52
+ pattern = pattern[1..] if pattern.start_with?('\\')
53
+
54
+ # Split pattern into segments. -1 to allow trailing slashes.
55
+ pattern_segs = pattern.split('/', -1)
56
+
57
+ # Normalize pattern to make processing easier.
58
+
59
+ # A pattern beginning with a slash ('/') will only match paths
60
+ # directly on the root directory instead of any descendant
61
+ # paths. So, remove empty first segment to make pattern relative
62
+ # to root.
63
+ if pattern_segs[0].empty?
64
+ pattern_segs.shift
65
+ elsif pattern_segs.length == 1 ||
66
+ pattern_segs.length == 2 && pattern_segs[-1].empty?
67
+ # A pattern without a beginning slash ('/') will match any
68
+ # descendant path. This is equivilent to "**/{pattern}". So,
69
+ # prepend with double-asterisks to make pattern relative to
70
+ # root.
71
+ # EDGE CASE: This also holds for a single pattern with a
72
+ # trailing slash (e.g. dir/).
73
+ pattern_segs.insert(0, '**') if pattern_segs[0] != '**'
74
+ end
85
75
 
76
+ # A pattern ending with a slash ('/') will match all descendant
77
+ # paths of if it is a directory but not if it is a regular file.
78
+ # This is equivilent to "{pattern}/**". So, set last segment to
79
+ # double asterisks to include all descendants.
80
+ pattern_segs[-1] = '**' if pattern_segs[-1].empty? && pattern_segs.length > 1
81
+
82
+ # Handle platforms with backslash separated paths
83
+ path_sep = if File::SEPARATOR == '\\'
84
+ '\\\\'
85
+ else
86
+ '/'
87
+ end
88
+
89
+ # Build regular expression from pattern.
90
+ regex = '^'
91
+ need_slash = false
92
+ regex_end = pattern_segs.size - 1
93
+ pattern_segs.each_index do |i|
94
+ seg = pattern_segs[i]
95
+
96
+ case seg
97
+ when '**'
98
+ # A pattern consisting solely of double-asterisks ('**')
99
+ # will match every path.
100
+ if i == 0 && i == regex_end
101
+ regex.concat('.+')
102
+
103
+ # A normalized pattern beginning with double-asterisks
104
+ # ('**') will match any leading path segments.
105
+ elsif i == 0
106
+ regex.concat("(?:.+#{path_sep})?")
107
+ need_slash = false
108
+
109
+ # A normalized pattern ending with double-asterisks ('**')
110
+ # will match any trailing path segments.
111
+ elsif i == regex_end
112
+ regex.concat("#{path_sep}.*")
113
+
114
+ # A pattern with inner double-asterisks ('**') will match
115
+ # multiple (or zero) inner path segments.
116
+ else
117
+ regex.concat("(?:#{path_sep}.+)?")
118
+ need_slash = true
119
+ end
120
+
121
+ # Match single path segment.
122
+ when '*'
123
+ regex.concat(path_sep) if need_slash
124
+
125
+ regex.concat("[^#{path_sep}]+")
126
+ need_slash = true
86
127
 
87
- # Build regular expression from pattern.
88
- regex = '^'
89
- need_slash = false
90
- regex_end = pattern_segs.size - 1
91
- pattern_segs.each_index do |i|
92
- seg = pattern_segs[i]
93
-
94
- if seg == '**'
95
- # A pattern consisting solely of double-asterisks ('**')
96
- # will match every path.
97
- if i == 0 && i == regex_end
98
- regex.concat('.+')
99
-
100
- # A normalized pattern beginning with double-asterisks
101
- # ('**') will match any leading path segments.
102
- elsif i == 0
103
- regex.concat("(?:.+#{path_sep})?")
104
- need_slash = false
105
-
106
- # A normalized pattern ending with double-asterisks ('**')
107
- # will match any trailing path segments.
108
- elsif i == regex_end
109
- regex.concat("#{path_sep}.*")
110
-
111
- # A pattern with inner double-asterisks ('**') will match
112
- # multiple (or zero) inner path segments.
113
128
  else
114
- regex.concat("(?:#{path_sep}.+)?")
115
- need_slash = true
116
- end
129
+ # Match segment glob pattern.
130
+ regex.concat(path_sep) if need_slash
117
131
 
118
- # Match single path segment.
119
- elsif seg == '*'
120
- if need_slash
121
- regex.concat(path_sep)
122
- end
132
+ regex.concat(translate_segment_glob(seg))
123
133
 
124
- regex.concat("[^#{path_sep}]+")
125
- need_slash = true
134
+ if i == regex_end && @inclusive
135
+ # A pattern ending without a slash ('/') will match a file
136
+ # or a directory (with paths underneath it).
137
+ # e.g. foo matches: foo, foo/bar, foo/bar/baz, etc.
138
+ # EDGE CASE: However, this does not hold for exclusion cases
139
+ # according to `git check-ignore` (v2.4.1).
140
+ regex.concat("(?:#{path_sep}.*)?")
141
+ end
126
142
 
127
- else
128
- # Match segment glob pattern.
129
- if need_slash
130
- regex.concat(path_sep)
143
+ need_slash = true
131
144
  end
132
-
133
- regex.concat(translate_segment_glob(seg))
134
- need_slash = true
135
145
  end
136
- end
137
146
 
138
- regex.concat('$')
139
- super(regex)
140
- end
141
- end
147
+ regex.concat('$')
148
+ super(regex)
142
149
 
143
- def match(path)
144
- super(path)
145
- end
150
+ # Copy original pattern
151
+ @pattern = original_pattern.dup
152
+ end
153
+ end
146
154
 
147
- def translate_segment_glob(pattern)
148
- """
155
+ def translate_segment_glob(pattern)
156
+ ''"
149
157
  Translates the glob pattern to a regular expression. This is used in
150
158
  the constructor to translate a path segment glob pattern to its
151
159
  corresponding regular expression.
@@ -153,123 +161,117 @@ class GitIgnoreSpec < RegexSpec
153
161
  *pattern* (``str``) is the glob pattern.
154
162
 
155
163
  Returns the regular expression (``str``).
156
- """
157
- # NOTE: This is derived from `fnmatch.translate()` and is similar to
158
- # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
159
-
160
- escape = false
161
- regex = ''
162
- i = 0
163
-
164
- while i < pattern.size
165
- # Get next character.
166
- char = pattern[i].chr
167
- i += 1
168
-
169
- # Escape the character.
170
- if escape
171
- escape = false
172
- regex += Regexp.escape(char)
173
-
174
- # Escape character, escape next character.
175
- elsif char == '\\'
176
- escape = true
177
-
178
- # Multi-character wildcard. Match any string (except slashes),
179
- # including an empty string.
180
- elsif char == '*'
181
- regex += '[^/]*'
182
-
183
- # Single-character wildcard. Match any single character (except
184
- # a slash).
185
- elsif char == '?'
186
- regex += '[^/]'
187
-
188
- # Braket expression wildcard. Except for the beginning
189
- # exclamation mark, the whole braket expression can be used
190
- # directly as regex but we have to find where the expression
191
- # ends.
192
- # - "[][!]" matchs ']', '[' and '!'.
193
- # - "[]-]" matchs ']' and '-'.
194
- # - "[!]a-]" matchs any character except ']', 'a' and '-'.
195
- elsif char == '['
196
- j = i
197
- # Pass brack expression negation.
198
- if j < pattern.size && pattern[j].chr == '!'
199
- j += 1
200
- end
201
-
202
- # Pass first closing braket if it is at the beginning of the
203
- # expression.
204
- if j < pattern.size && pattern[j].chr == ']'
205
- j += 1
206
- end
207
-
208
- # Find closing braket. Stop once we reach the end or find it.
209
- while j < pattern.size && pattern[j].chr != ']'
210
- j += 1
211
- end
212
-
213
-
214
- if j < pattern.size
215
- expr = '['
216
-
217
- # Braket expression needs to be negated.
218
- if pattern[i].chr == '!'
219
- expr += '^'
220
- i += 1
221
-
222
- # POSIX declares that the regex braket expression negation
223
- # "[^...]" is undefined in a glob pattern. Python's
224
- # `fnmatch.translate()` escapes the caret ('^') as a
225
- # literal. To maintain consistency with undefined behavior,
226
- # I am escaping the '^' as well.
227
- elsif pattern[i].chr == '^'
228
- expr += '\\^'
229
- i += 1
230
- end
231
-
232
- # Escape brackets contained within pattern
233
- if pattern[i].chr == ']' && i != j
234
- expr += '\]'
235
- i += 1
164
+ "''
165
+ # NOTE: This is derived from `fnmatch.translate()` and is similar to
166
+ # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
167
+
168
+ escape = false
169
+ regex = ''
170
+ i = 0
171
+
172
+ while i < pattern.size
173
+ # Get next character.
174
+ char = pattern[i].chr
175
+ i += 1
176
+
177
+ # Escape the character.
178
+ if escape
179
+ escape = false
180
+ regex += Regexp.escape(char)
181
+
182
+ # Escape character, escape next character.
183
+ elsif char == '\\'
184
+ escape = true
185
+
186
+ # Multi-character wildcard. Match any string (except slashes),
187
+ # including an empty string.
188
+ elsif char == '*'
189
+ regex += '[^/]*'
190
+
191
+ # Single-character wildcard. Match any single character (except
192
+ # a slash).
193
+ elsif char == '?'
194
+ regex += '[^/]'
195
+
196
+ # Braket expression wildcard. Except for the beginning
197
+ # exclamation mark, the whole braket expression can be used
198
+ # directly as regex but we have to find where the expression
199
+ # ends.
200
+ # - "[][!]" matchs ']', '[' and '!'.
201
+ # - "[]-]" matchs ']' and '-'.
202
+ # - "[!]a-]" matchs any character except ']', 'a' and '-'.
203
+ elsif char == '['
204
+ j = i
205
+ # Pass brack expression negation.
206
+ j += 1 if j < pattern.size && pattern[j].chr == '!'
207
+
208
+ # Pass first closing braket if it is at the beginning of the
209
+ # expression.
210
+ j += 1 if j < pattern.size && pattern[j].chr == ']'
211
+
212
+ # Find closing braket. Stop once we reach the end or find it.
213
+ j += 1 while j < pattern.size && pattern[j].chr != ']'
214
+
215
+ if j < pattern.size
216
+ expr = '['
217
+
218
+ # Braket expression needs to be negated.
219
+ case pattern[i].chr
220
+ when '!'
221
+ expr += '^'
222
+ i += 1
223
+
224
+ # POSIX declares that the regex braket expression negation
225
+ # "[^...]" is undefined in a glob pattern. Python's
226
+ # `fnmatch.translate()` escapes the caret ('^') as a
227
+ # literal. To maintain consistency with undefined behavior,
228
+ # I am escaping the '^' as well.
229
+ when '^'
230
+ expr += '\\^'
231
+ i += 1
232
+ end
233
+
234
+ # Escape brackets contained within pattern
235
+ if pattern[i].chr == ']' && i != j
236
+ expr += '\]'
237
+ i += 1
238
+ end
239
+
240
+ # Build regex braket expression. Escape slashes so they are
241
+ # treated as literal slashes by regex as defined by POSIX.
242
+ expr += pattern[i..j].sub('\\', '\\\\')
243
+
244
+ # Add regex braket expression to regex result.
245
+ regex += expr
246
+
247
+ # Found end of braket expression. Increment j to be one past
248
+ # the closing braket:
249
+ #
250
+ # [...]
251
+ # ^ ^
252
+ # i j
253
+ #
254
+ j += 1
255
+ # Set i to one past the closing braket.
256
+ i = j
257
+
258
+ # Failed to find closing braket, treat opening braket as a
259
+ # braket literal instead of as an expression.
260
+ else
261
+ regex += '\['
236
262
  end
237
263
 
238
-
239
- # Build regex braket expression. Escape slashes so they are
240
- # treated as literal slashes by regex as defined by POSIX.
241
- expr += pattern[i..j].sub('\\', '\\\\')
242
-
243
- # Add regex braket expression to regex result.
244
- regex += expr
245
-
246
- # Found end of braket expression. Increment j to be one past
247
- # the closing braket:
248
- #
249
- # [...]
250
- # ^ ^
251
- # i j
252
- #
253
- j += 1
254
- # Set i to one past the closing braket.
255
- i = j
256
-
257
- # Failed to find closing braket, treat opening braket as a
258
- # braket literal instead of as an expression.
264
+ # Regular character, escape it for regex.
259
265
  else
260
- regex += '\['
266
+ regex << Regexp.escape(char)
261
267
  end
262
-
263
- # Regular character, escape it for regex.
264
- else
265
- regex << Regexp.escape(char)
266
268
  end
267
- end
268
269
 
269
- regex
270
- end
270
+ regex
271
+ end
271
272
 
272
- def inclusive?
273
- @inclusive
273
+ def inclusive?
274
+ @inclusive
275
+ end
274
276
  end
275
277
  end