pathspec 0.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,294 +1,276 @@
1
- # encoding: utf-8
2
-
3
1
  require 'pathspec/regexspec'
4
2
 
5
- class GitIgnoreSpec < RegexSpec
6
- attr_reader :regex
7
-
8
- def initialize(pattern)
9
- pattern = pattern.strip unless pattern.nil?
10
-
11
- # A pattern starting with a hash ('#') serves as a comment
12
- # (neither includes nor excludes files). Escape the hash with a
13
- # back-slash to match a literal hash (i.e., '\#').
14
- if pattern.start_with?('#')
15
- @regex = nil
16
- @inclusive = nil
17
-
18
- # A blank pattern is a null-operation (neither includes nor
19
- # excludes files).
20
- elsif pattern.empty?
21
- @regex = nil
22
- @inclusive = nil
23
-
24
- # Patterns containing three or more consecutive stars are invalid and
25
- # will be ignored.
26
- elsif pattern =~ /\*\*\*+/
27
- @regex = nil
28
- @inclusive = nil
29
-
30
- # EDGE CASE: According to git check-ignore (v2.4.1)), a single '/'
31
- # does not match any file
32
- elsif pattern == '/'
33
- @regex = nil
34
- @inclusive = nil
35
-
36
- # We have a valid pattern!
37
- else
38
- # A pattern starting with an exclamation mark ('!') negates the
39
- # pattern (exclude instead of include). Escape the exclamation
40
- # mark with a back-slash to match a literal exclamation mark
41
- # (i.e., '\!').
42
- if pattern.start_with?('!')
43
- @inclusive = false
44
- # Remove leading exclamation mark.
45
- pattern = pattern[1..-1]
3
+ class PathSpec
4
+ # Class for parsing a .gitignore spec
5
+ class GitIgnoreSpec < RegexSpec
6
+ attr_reader :regex, :pattern
7
+
8
+ def initialize(original_pattern) # rubocop:disable Metrics/CyclomaticComplexity
9
+ pattern = original_pattern.strip unless original_pattern.nil?
10
+
11
+ # A pattern starting with a hash ('#') serves as a comment
12
+ # (neither includes nor excludes files). Escape the hash with a
13
+ # back-slash to match a literal hash (i.e., '\#').
14
+ if pattern.start_with?('#')
15
+ @regex = nil
16
+ @inclusive = nil
17
+
18
+ # A blank pattern is a null-operation (neither includes nor
19
+ # excludes files).
20
+ elsif pattern.empty? # rubocop:disable Lint/DuplicateBranch
21
+ @regex = nil
22
+ @inclusive = nil
23
+
24
+ # Patterns containing three or more consecutive stars are invalid and
25
+ # will be ignored.
26
+ elsif /\*\*\*+/.match?(pattern) # rubocop:disable Lint/DuplicateBranch
27
+ @regex = nil
28
+ @inclusive = nil
29
+
30
+ # EDGE CASE: According to git check-ignore (v2.4.1)), a single '/'
31
+ # does not match any file
32
+ elsif pattern == '/' # rubocop:disable Lint/DuplicateBranch
33
+ @regex = nil
34
+ @inclusive = nil
35
+
36
+ # We have a valid pattern!
46
37
  else
47
- @inclusive = true
48
- end
49
-
50
- # Remove leading back-slash escape for escaped hash ('#') or
51
- # exclamation mark ('!').
52
- if pattern.start_with?('\\')
53
- pattern = pattern[1..-1]
54
- end
55
-
56
- # Split pattern into segments. -1 to allow trailing slashes.
57
- pattern_segs = pattern.split('/', -1)
58
-
59
- # Normalize pattern to make processing easier.
60
-
61
- # A pattern beginning with a slash ('/') will only match paths
62
- # directly on the root directory instead of any descendant
63
- # paths. So, remove empty first segment to make pattern relative
64
- # to root.
65
- if pattern_segs[0].empty?
66
- pattern_segs.shift
67
- elsif pattern_segs.length == 1 ||
68
- pattern_segs.length == 2 && pattern_segs[-1].empty?
69
- # A pattern without a beginning slash ('/') will match any
70
- # descendant path. This is equivilent to "**/{pattern}". So,
71
- # prepend with double-asterisks to make pattern relative to
72
- # root.
73
- # EDGE CASE: This also holds for a single pattern with a
74
- # trailing slash (e.g. dir/).
75
- if pattern_segs[0] != '**'
76
- pattern_segs.insert(0, '**')
38
+ # A pattern starting with an exclamation mark ('!') negates the
39
+ # pattern (exclude instead of include). Escape the exclamation
40
+ # mark with a back-slash to match a literal exclamation mark
41
+ # (i.e., '\!').
42
+ if pattern.start_with?('!')
43
+ @inclusive = false
44
+ # Remove leading exclamation mark.
45
+ pattern = pattern[1..]
46
+ else
47
+ @inclusive = true
77
48
  end
78
- end
79
-
80
- # A pattern ending with a slash ('/') will match all descendant
81
- # paths of if it is a directory but not if it is a regular file.
82
- # This is equivilent to "{pattern}/**". So, set last segment to
83
- # double asterisks to include all descendants.
84
- if pattern_segs[-1].empty? && pattern_segs.length > 1
85
- pattern_segs[-1] = '**'
86
- end
87
49
 
88
- # Handle platforms with backslash separated paths
89
- if File::SEPARATOR == '\\'
90
- path_sep = '\\\\'
91
- else
92
- path_sep = '/'
93
- end
50
+ # Remove leading back-slash escape for escaped hash ('#') or
51
+ # exclamation mark ('!').
52
+ pattern = pattern[1..] if pattern.start_with?('\\')
53
+
54
+ # Split pattern into segments. -1 to allow trailing slashes.
55
+ pattern_segs = pattern.split('/', -1)
56
+
57
+ # Normalize pattern to make processing easier.
58
+
59
+ # A pattern beginning with a slash ('/') will only match paths
60
+ # directly on the root directory instead of any descendant
61
+ # paths. So, remove empty first segment to make pattern relative
62
+ # to root.
63
+ if pattern_segs[0].empty?
64
+ pattern_segs.shift
65
+ elsif pattern_segs.length == 1 ||
66
+ pattern_segs.length == 2 && pattern_segs[-1].empty?
67
+ # A pattern without a beginning slash ('/') will match any
68
+ # descendant path. This is equivilent to "**/{pattern}". So,
69
+ # prepend with double-asterisks to make pattern relative to
70
+ # root.
71
+ # EDGE CASE: This also holds for a single pattern with a
72
+ # trailing slash (e.g. dir/).
73
+ pattern_segs.insert(0, '**') if pattern_segs[0] != '**'
74
+ end
94
75
 
76
+ # A pattern ending with a slash ('/') will match all descendant
77
+ # paths of if it is a directory but not if it is a regular file.
78
+ # This is equivilent to "{pattern}/**". So, set last segment to
79
+ # double asterisks to include all descendants.
80
+ pattern_segs[-1] = '**' if pattern_segs[-1].empty? && pattern_segs.length > 1
81
+
82
+ # Handle platforms with backslash separated paths
83
+ path_sep = if File::SEPARATOR == '\\'
84
+ '\\\\'
85
+ else
86
+ '/'
87
+ end
88
+
89
+ # Build regular expression from pattern.
90
+ regex = '^'
91
+ need_slash = false
92
+ regex_end = pattern_segs.size - 1
93
+ pattern_segs.each_index do |i|
94
+ seg = pattern_segs[i]
95
+
96
+ case seg
97
+ when '**'
98
+ # A pattern consisting solely of double-asterisks ('**')
99
+ # will match every path.
100
+ if i == 0 && i == regex_end
101
+ regex.concat('.+')
102
+
103
+ # A normalized pattern beginning with double-asterisks
104
+ # ('**') will match any leading path segments.
105
+ elsif i == 0
106
+ regex.concat("(?:.+#{path_sep})?")
107
+ need_slash = false
108
+
109
+ # A normalized pattern ending with double-asterisks ('**')
110
+ # will match any trailing path segments.
111
+ elsif i == regex_end
112
+ regex.concat("#{path_sep}.*")
113
+
114
+ # A pattern with inner double-asterisks ('**') will match
115
+ # multiple (or zero) inner path segments.
116
+ else
117
+ regex.concat("(?:#{path_sep}.+)?")
118
+ need_slash = true
119
+ end
120
+
121
+ # Match single path segment.
122
+ when '*'
123
+ regex.concat(path_sep) if need_slash
124
+
125
+ regex.concat("[^#{path_sep}]+")
126
+ need_slash = true
95
127
 
96
- # Build regular expression from pattern.
97
- regex = '^'
98
- need_slash = false
99
- regex_end = pattern_segs.size - 1
100
- pattern_segs.each_index do |i|
101
- seg = pattern_segs[i]
102
-
103
- if seg == '**'
104
- # A pattern consisting solely of double-asterisks ('**')
105
- # will match every path.
106
- if i == 0 && i == regex_end
107
- regex.concat('.+')
108
-
109
- # A normalized pattern beginning with double-asterisks
110
- # ('**') will match any leading path segments.
111
- elsif i == 0
112
- regex.concat("(?:.+#{path_sep})?")
113
- need_slash = false
114
-
115
- # A normalized pattern ending with double-asterisks ('**')
116
- # will match any trailing path segments.
117
- elsif i == regex_end
118
- regex.concat("#{path_sep}.*")
119
-
120
- # A pattern with inner double-asterisks ('**') will match
121
- # multiple (or zero) inner path segments.
122
128
  else
123
- regex.concat("(?:#{path_sep}.+)?")
124
- need_slash = true
125
- end
129
+ # Match segment glob pattern.
130
+ regex.concat(path_sep) if need_slash
126
131
 
127
- # Match single path segment.
128
- elsif seg == '*'
129
- if need_slash
130
- regex.concat(path_sep)
131
- end
132
+ regex.concat(translate_segment_glob(seg))
132
133
 
133
- regex.concat("[^#{path_sep}]+")
134
- need_slash = true
134
+ if i == regex_end && @inclusive
135
+ # A pattern ending without a slash ('/') will match a file
136
+ # or a directory (with paths underneath it).
137
+ # e.g. foo matches: foo, foo/bar, foo/bar/baz, etc.
138
+ # EDGE CASE: However, this does not hold for exclusion cases
139
+ # according to `git check-ignore` (v2.4.1).
140
+ regex.concat("(?:#{path_sep}.*)?")
141
+ end
135
142
 
136
- else
137
- # Match segment glob pattern.
138
- if need_slash
139
- regex.concat(path_sep)
143
+ need_slash = true
140
144
  end
145
+ end
141
146
 
142
- regex.concat(translate_segment_glob(seg))
143
-
144
- if i == regex_end && @inclusive
145
- # A pattern ending without a slash ('/') will match a file
146
- # or a directory (with paths underneath it).
147
- # e.g. foo matches: foo, foo/bar, foo/bar/baz, etc.
148
- # EDGE CASE: However, this does not hold for exclusion cases
149
- # according to `git check-ignore` (v2.4.1).
150
- regex.concat("(?:#{path_sep}.*)?")
151
- end
147
+ regex.concat('$')
148
+ super(regex)
152
149
 
153
- need_slash = true
154
- end
150
+ # Copy original pattern
151
+ @pattern = original_pattern.dup
155
152
  end
156
-
157
- regex.concat('$')
158
- super(regex)
159
153
  end
160
- end
161
154
 
162
- def match(path)
163
- super(path)
164
- end
165
-
166
- def translate_segment_glob(pattern)
167
- """
168
- Translates the glob pattern to a regular expression. This is used in
169
- the constructor to translate a path segment glob pattern to its
170
- corresponding regular expression.
171
-
172
- *pattern* (``str``) is the glob pattern.
173
-
174
- Returns the regular expression (``str``).
175
- """
176
- # NOTE: This is derived from `fnmatch.translate()` and is similar to
177
- # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
178
-
179
- escape = false
180
- regex = ''
181
- i = 0
182
-
183
- while i < pattern.size
184
- # Get next character.
185
- char = pattern[i].chr
186
- i += 1
187
-
188
- # Escape the character.
189
- if escape
190
- escape = false
191
- regex += Regexp.escape(char)
192
-
193
- # Escape character, escape next character.
194
- elsif char == '\\'
195
- escape = true
196
-
197
- # Multi-character wildcard. Match any string (except slashes),
198
- # including an empty string.
199
- elsif char == '*'
200
- regex += '[^/]*'
201
-
202
- # Single-character wildcard. Match any single character (except
203
- # a slash).
204
- elsif char == '?'
205
- regex += '[^/]'
206
-
207
- # Braket expression wildcard. Except for the beginning
208
- # exclamation mark, the whole braket expression can be used
209
- # directly as regex but we have to find where the expression
210
- # ends.
211
- # - "[][!]" matchs ']', '[' and '!'.
212
- # - "[]-]" matchs ']' and '-'.
213
- # - "[!]a-]" matchs any character except ']', 'a' and '-'.
214
- elsif char == '['
215
- j = i
216
- # Pass brack expression negation.
217
- if j < pattern.size && pattern[j].chr == '!'
218
- j += 1
219
- end
220
-
221
- # Pass first closing braket if it is at the beginning of the
222
- # expression.
223
- if j < pattern.size && pattern[j].chr == ']'
224
- j += 1
225
- end
226
-
227
- # Find closing braket. Stop once we reach the end or find it.
228
- while j < pattern.size && pattern[j].chr != ']'
229
- j += 1
230
- end
231
-
232
-
233
- if j < pattern.size
234
- expr = '['
235
-
236
- # Braket expression needs to be negated.
237
- if pattern[i].chr == '!'
238
- expr += '^'
239
- i += 1
240
-
241
- # POSIX declares that the regex braket expression negation
242
- # "[^...]" is undefined in a glob pattern. Python's
243
- # `fnmatch.translate()` escapes the caret ('^') as a
244
- # literal. To maintain consistency with undefined behavior,
245
- # I am escaping the '^' as well.
246
- elsif pattern[i].chr == '^'
247
- expr += '\\^'
248
- i += 1
249
- end
250
-
251
- # Escape brackets contained within pattern
252
- if pattern[i].chr == ']' && i != j
253
- expr += '\]'
254
- i += 1
155
+ def translate_segment_glob(pattern)
156
+ # Translates the glob pattern to a regular expression. This is used in
157
+ # the constructor to translate a path segment glob pattern to its
158
+ # corresponding regular expression.
159
+ #
160
+ # *pattern* (``str``) is the glob pattern.
161
+ #
162
+ # Returns the regular expression (``str``).
163
+ #
164
+ # NOTE: This is derived from `fnmatch.translate()` and is similar to
165
+ # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
166
+
167
+ escape = false
168
+ regex = ''
169
+ i = 0
170
+
171
+ while i < pattern.size
172
+ # Get next character.
173
+ char = pattern[i].chr
174
+ i += 1
175
+
176
+ # Escape the character.
177
+ if escape
178
+ escape = false
179
+ regex += Regexp.escape(char)
180
+
181
+ # Escape character, escape next character.
182
+ elsif char == '\\'
183
+ escape = true
184
+
185
+ # Multi-character wildcard. Match any string (except slashes),
186
+ # including an empty string.
187
+ elsif char == '*'
188
+ regex += '[^/]*'
189
+
190
+ # Single-character wildcard. Match any single character (except
191
+ # a slash).
192
+ elsif char == '?'
193
+ regex += '[^/]'
194
+
195
+ # Braket expression wildcard. Except for the beginning
196
+ # exclamation mark, the whole braket expression can be used
197
+ # directly as regex but we have to find where the expression
198
+ # ends.
199
+ # - "[][!]" matchs ']', '[' and '!'.
200
+ # - "[]-]" matchs ']' and '-'.
201
+ # - "[!]a-]" matchs any character except ']', 'a' and '-'.
202
+ elsif char == '['
203
+ j = i
204
+ # Pass brack expression negation.
205
+ j += 1 if j < pattern.size && pattern[j].chr == '!'
206
+
207
+ # Pass first closing braket if it is at the beginning of the
208
+ # expression.
209
+ j += 1 if j < pattern.size && pattern[j].chr == ']'
210
+
211
+ # Find closing braket. Stop once we reach the end or find it.
212
+ j += 1 while j < pattern.size && pattern[j].chr != ']'
213
+
214
+ if j < pattern.size
215
+ expr = '['
216
+
217
+ # Braket expression needs to be negated.
218
+ case pattern[i].chr
219
+ when '!'
220
+ expr += '^'
221
+ i += 1
222
+
223
+ # POSIX declares that the regex braket expression negation
224
+ # "[^...]" is undefined in a glob pattern. Python's
225
+ # `fnmatch.translate()` escapes the caret ('^') as a
226
+ # literal. To maintain consistency with undefined behavior,
227
+ # I am escaping the '^' as well.
228
+ when '^'
229
+ expr += '\\^'
230
+ i += 1
231
+ end
232
+
233
+ # Escape brackets contained within pattern
234
+ if pattern[i].chr == ']' && i != j
235
+ expr += '\]'
236
+ i += 1
237
+ end
238
+
239
+ # Build regex braket expression. Escape slashes so they are
240
+ # treated as literal slashes by regex as defined by POSIX.
241
+ expr += pattern[i..j].sub('\\', '\\\\')
242
+
243
+ # Add regex braket expression to regex result.
244
+ regex += expr
245
+
246
+ # Found end of braket expression. Increment j to be one past
247
+ # the closing braket:
248
+ #
249
+ # [...]
250
+ # ^ ^
251
+ # i j
252
+ #
253
+ j += 1
254
+ # Set i to one past the closing braket.
255
+ i = j
256
+
257
+ # Failed to find closing braket, treat opening braket as a
258
+ # braket literal instead of as an expression.
259
+ else
260
+ regex += '\['
255
261
  end
256
262
 
257
-
258
- # Build regex braket expression. Escape slashes so they are
259
- # treated as literal slashes by regex as defined by POSIX.
260
- expr += pattern[i..j].sub('\\', '\\\\')
261
-
262
- # Add regex braket expression to regex result.
263
- regex += expr
264
-
265
- # Found end of braket expression. Increment j to be one past
266
- # the closing braket:
267
- #
268
- # [...]
269
- # ^ ^
270
- # i j
271
- #
272
- j += 1
273
- # Set i to one past the closing braket.
274
- i = j
275
-
276
- # Failed to find closing braket, treat opening braket as a
277
- # braket literal instead of as an expression.
263
+ # Regular character, escape it for regex.
278
264
  else
279
- regex += '\['
265
+ regex << Regexp.escape(char)
280
266
  end
281
-
282
- # Regular character, escape it for regex.
283
- else
284
- regex << Regexp.escape(char)
285
267
  end
286
- end
287
268
 
288
- regex
289
- end
269
+ regex
270
+ end
290
271
 
291
- def inclusive?
292
- @inclusive
272
+ def inclusive?
273
+ @inclusive
274
+ end
293
275
  end
294
276
  end
@@ -1,17 +1,21 @@
1
1
  require 'pathspec/spec'
2
2
 
3
- class RegexSpec < Spec
4
- def initialize(regex)
5
- @regex = Regexp.compile regex
3
+ class PathSpec
4
+ # Simple regex-based spec
5
+ class RegexSpec < Spec
6
+ def initialize(pattern)
7
+ @pattern = pattern.dup
8
+ @regex = Regexp.compile pattern
6
9
 
7
- super
8
- end
10
+ super
11
+ end
9
12
 
10
- def inclusive?
11
- true
12
- end
13
+ def inclusive?
14
+ true
15
+ end
13
16
 
14
- def match(path)
15
- @regex.match(path) if @regex
17
+ def match(path)
18
+ @regex&.match(path)
19
+ end
16
20
  end
17
21
  end
data/lib/pathspec/spec.rb CHANGED
@@ -1,14 +1,20 @@
1
- class Spec
2
- attr_reader :regex
1
+ class PathSpec
2
+ # Abstract spec
3
+ class Spec
4
+ attr_reader :regex, :pattern
3
5
 
4
- def initialize(*_)
5
- end
6
+ def initialize(*_); end
6
7
 
7
- def match(files)
8
- raise "Unimplemented"
9
- end
8
+ def match(files)
9
+ raise 'Unimplemented'
10
+ end
11
+
12
+ def inclusive?
13
+ true
14
+ end
10
15
 
11
- def inclusive?
12
- true
16
+ def to_s
17
+ @pattern
18
+ end
13
19
  end
14
20
  end