picoglob 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +23 -0
- data/LICENSE +21 -0
- data/README.md +112 -0
- data/lib/picoglob/compiler.rb +333 -0
- data/lib/picoglob/version.rb +5 -0
- data/lib/picoglob.rb +91 -0
- metadata +86 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ac02c0c9ac62540cbd3e42e65acc4214842d0cc595644e13679607004da0dfa9
|
|
4
|
+
data.tar.gz: 3c8c99572698165a76c90983e031b6105e44e7946dd15c71ed8c192c01961823
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: e287bbdf2f7748715de21feb07fcb3edeadf36656a1d47b59e9e0e62080864d65aac65135f71a1df360f042d7ae93f993defe3197a5ab7e163cd97dddd402180
|
|
7
|
+
data.tar.gz: 5311aec1c0031f231b28a359e47efca6c11e302451c389d4a352bf07f77a657f70a5dc2e1a89a2fa3906cde8a99fdd20bdace3169fa90cb95f963d723d444f86
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-05-28
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Initial release.
|
|
14
|
+
- `Picoglob.new(pattern, **opts)` -> `Picoglob::Matcher` (compile once, match many).
|
|
15
|
+
- `Picoglob.match?`, `Picoglob.to_regexp`, `Picoglob.filter` convenience methods.
|
|
16
|
+
- `Matcher#match?`, `#===`, `#to_regexp`, `#filter`, `#pattern`, `#regexp`.
|
|
17
|
+
- Glob syntax: `*`, `**`, `**/`, `?`, `[...]`, `[!...]`/`[^...]`, `{a,b}`,
|
|
18
|
+
`{1..n}` ranges, and extglobs `@()`, `?()`, `*()`, `+()`, `!()`, plus escaping.
|
|
19
|
+
- Options: `separator`, `dot`, `extglob`, `nocase`.
|
|
20
|
+
- `Picoglob::ParseError` for malformed patterns.
|
|
21
|
+
|
|
22
|
+
[Unreleased]: https://github.com/tachyurgy/picoglob/compare/v0.1.0...HEAD
|
|
23
|
+
[0.1.0]: https://github.com/tachyurgy/picoglob/releases/tag/v0.1.0
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Levelbrook Consulting
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# picoglob
|
|
2
|
+
|
|
3
|
+
Compile **bash-style glob patterns into reusable Ruby `Regexp`s** so you can
|
|
4
|
+
match *arbitrary strings* — S3 keys, routes, log lines, branch names, queue
|
|
5
|
+
topics — not just files on disk.
|
|
6
|
+
|
|
7
|
+
It's the missing Ruby counterpart to JavaScript's
|
|
8
|
+
[picomatch](https://github.com/micromatch/picomatch) / minimatch. Ruby ships
|
|
9
|
+
`File.fnmatch` and `Dir.glob`, but neither hands you a reusable `Regexp`, and
|
|
10
|
+
`File.fnmatch`'s brace/extglob support is awkward to use off the filesystem.
|
|
11
|
+
|
|
12
|
+
Pure Ruby, zero dependencies.
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
# Gemfile
|
|
18
|
+
gem "picoglob"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
```sh
|
|
22
|
+
gem install picoglob
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
|
|
27
|
+
```ruby
|
|
28
|
+
require "picoglob"
|
|
29
|
+
|
|
30
|
+
# One-shot
|
|
31
|
+
Picoglob.match?("src/**/*.{rb,erb}", "src/app/models/user.rb") # => true
|
|
32
|
+
Picoglob.match?("*.rb", "lib/foo.rb") # => false (single * doesn't cross "/")
|
|
33
|
+
|
|
34
|
+
# Compile once, match many (recommended in hot paths)
|
|
35
|
+
g = Picoglob.new("logs/*.log")
|
|
36
|
+
g.match?("logs/app.log") # => true
|
|
37
|
+
g.match?("logs/2026/x.log") # => false
|
|
38
|
+
|
|
39
|
+
# Get the underlying Regexp
|
|
40
|
+
Picoglob.to_regexp("*.rb") # => /\A(?:(?!\.)[^\/]*)\.rb\z/
|
|
41
|
+
|
|
42
|
+
# Filter a list
|
|
43
|
+
Picoglob.filter("**/*.rb", ["a.rb", "lib/b.rb", "c.txt"]) # => ["a.rb", "lib/b.rb"]
|
|
44
|
+
|
|
45
|
+
# Use it in a case/when (Matcher implements ===)
|
|
46
|
+
case key
|
|
47
|
+
when Picoglob.new("uploads/**/*.jpg") then handle_image
|
|
48
|
+
end
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Supported syntax
|
|
52
|
+
|
|
53
|
+
| Pattern | Meaning |
|
|
54
|
+
|---|---|
|
|
55
|
+
| `*` | any run of non-separator characters |
|
|
56
|
+
| `**` | globstar — any run of characters, *including* separators |
|
|
57
|
+
| `**/` | zero or more whole path segments (so `src/**/*.rb` also matches `src/foo.rb`) |
|
|
58
|
+
| `?` | exactly one non-separator character |
|
|
59
|
+
| `[abc]` `[a-z]` | character class |
|
|
60
|
+
| `[!abc]` `[^abc]` | negated character class |
|
|
61
|
+
| `{a,b,c}` | alternation — one of the alternatives |
|
|
62
|
+
| `{1..5}` | numeric range expansion |
|
|
63
|
+
| `@(a\|b)` | exactly one of (extglob) |
|
|
64
|
+
| `?(a\|b)` | zero or one of (extglob) |
|
|
65
|
+
| `*(a\|b)` | zero or more of (extglob) |
|
|
66
|
+
| `+(a\|b)` | one or more of (extglob) |
|
|
67
|
+
| `!(a\|b)` | anything except (extglob) |
|
|
68
|
+
| `\*` | a literal `*` (escape any metacharacter) |
|
|
69
|
+
|
|
70
|
+
Braces and extglobs nest, and compile recursively, so things like
|
|
71
|
+
`{a,b{c,d}}` and `image.@(jp?(e)g\|png)` work.
|
|
72
|
+
|
|
73
|
+
## Options
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
Picoglob.new(pattern,
|
|
77
|
+
separator: "/", # the char that * / ? won't cross (use "." for dotted names, etc.)
|
|
78
|
+
dot: false, # when false, wildcards won't match a leading "." (shell behavior)
|
|
79
|
+
extglob: true, # enable @()/?()/*()/+()/!() extglobs
|
|
80
|
+
nocase: false) # case-insensitive matching
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
```ruby
|
|
84
|
+
Picoglob.match?("*.RB", "foo.rb", nocase: true) # => true
|
|
85
|
+
Picoglob.match?("*", ".hidden") # => false (dotfile protected)
|
|
86
|
+
Picoglob.match?("*", ".hidden", dot: true) # => true
|
|
87
|
+
Picoglob.match?("a.*.c", "a.b.c", separator: ".") # => true
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Why not just use `File.fnmatch`?
|
|
91
|
+
|
|
92
|
+
`File.fnmatch` is fine for matching against the filesystem, but:
|
|
93
|
+
|
|
94
|
+
- it doesn't give you a reusable `Regexp` to combine, inspect, or reuse;
|
|
95
|
+
- brace expansion requires `File::FNM_EXTGLOB` and still can't be turned into a
|
|
96
|
+
pattern you keep;
|
|
97
|
+
- there's no extglob (`@()`, `+()`, `!()`, …) support;
|
|
98
|
+
- the dotfile / separator rules aren't configurable per call.
|
|
99
|
+
|
|
100
|
+
`picoglob` gives you a compiled `Regexp` you own, with picomatch-style semantics,
|
|
101
|
+
that works on any string.
|
|
102
|
+
|
|
103
|
+
## Development
|
|
104
|
+
|
|
105
|
+
```sh
|
|
106
|
+
bundle install
|
|
107
|
+
bundle exec rake test
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
MIT © Levelbrook Consulting. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Picoglob
|
|
4
|
+
# Raised when a glob pattern is malformed (e.g. an unbalanced brace or bracket).
|
|
5
|
+
class ParseError < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Compiles a bash-style glob pattern into a Ruby Regexp source string.
|
|
8
|
+
#
|
|
9
|
+
# Supported syntax:
|
|
10
|
+
# * any run of non-separator characters
|
|
11
|
+
# ** any run of characters, including separators (globstar)
|
|
12
|
+
# ? any single non-separator character
|
|
13
|
+
# [abc] [a-z] character class; [!...] or [^...] negates
|
|
14
|
+
# {a,b,c} brace alternation (one of the comma-separated alternatives)
|
|
15
|
+
# {1..3} numeric brace range expansion -> {1,2,3}
|
|
16
|
+
# @(a|b) exactly one of the patterns (extglob)
|
|
17
|
+
# ?(a|b) zero or one of the patterns (extglob)
|
|
18
|
+
# *(a|b) zero or more of the patterns (extglob)
|
|
19
|
+
# +(a|b) one or more of the patterns (extglob)
|
|
20
|
+
# !(a|b) anything except the patterns (extglob)
|
|
21
|
+
# \x escape the next character (literal)
|
|
22
|
+
#
|
|
23
|
+
# @api private
|
|
24
|
+
class Compiler
|
|
25
|
+
# @param separator [String] the path separator that `*`/`?` will not cross
|
|
26
|
+
# @param dot [Boolean] when false, a leading `.` must be matched explicitly
|
|
27
|
+
# (a leading `*`/`?`/`[` will not match it), mirroring shell globbing
|
|
28
|
+
# @param extglob [Boolean] enable extglob constructs (@/?/*/+/! followed by `(`)
|
|
29
|
+
# @param nocase [Boolean] case-insensitive matching
|
|
30
|
+
def initialize(separator: "/", dot: false, extglob: true, nocase: false)
|
|
31
|
+
@sep = separator
|
|
32
|
+
@dot = dot
|
|
33
|
+
@extglob = extglob
|
|
34
|
+
@nocase = nocase
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @param pattern [String]
|
|
38
|
+
# @return [Regexp]
|
|
39
|
+
def compile(pattern)
|
|
40
|
+
@chars = pattern.chars
|
|
41
|
+
@pos = 0
|
|
42
|
+
body = parse_sequence(top_level: true)
|
|
43
|
+
raise ParseError, "unexpected #{current.inspect}" unless eof?
|
|
44
|
+
|
|
45
|
+
src = "\\A#{body}\\z"
|
|
46
|
+
Regexp.new(src, @nocase ? Regexp::IGNORECASE : nil)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def sep_re
|
|
52
|
+
Regexp.escape(@sep)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# A single "non-separator" character class fragment, e.g. "[^/]".
|
|
56
|
+
def not_sep
|
|
57
|
+
"[^#{sep_re}]"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def current
|
|
61
|
+
@chars[@pos]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def peek(n = 1)
|
|
65
|
+
@chars[@pos + n]
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def advance
|
|
69
|
+
c = @chars[@pos]
|
|
70
|
+
@pos += 1
|
|
71
|
+
c
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def eof?
|
|
75
|
+
@pos >= @chars.length
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Parse a run of glob tokens until a terminator. When +stop+ is given
|
|
79
|
+
# (a set of characters), parsing stops *before* consuming a terminator.
|
|
80
|
+
def parse_sequence(stop: nil, top_level: false)
|
|
81
|
+
out = +""
|
|
82
|
+
until eof?
|
|
83
|
+
c = current
|
|
84
|
+
break if stop&.include?(c)
|
|
85
|
+
|
|
86
|
+
out << parse_token(at_segment_start: out.empty? || just_after_sep?(out), top_level: top_level)
|
|
87
|
+
end
|
|
88
|
+
out
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Did the regex built so far end at a segment boundary? Used for the
|
|
92
|
+
# leading-dot rule.
|
|
93
|
+
def just_after_sep?(built)
|
|
94
|
+
built.end_with?(sep_re)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def parse_token(at_segment_start:, top_level:)
|
|
98
|
+
c = current
|
|
99
|
+
|
|
100
|
+
# extglob: a prefix char immediately followed by '('
|
|
101
|
+
if @extglob && "@?*+!".include?(c) && peek == "("
|
|
102
|
+
return parse_extglob
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
case c
|
|
106
|
+
when "\\"
|
|
107
|
+
advance
|
|
108
|
+
nxt = advance
|
|
109
|
+
raise ParseError, "dangling escape" if nxt.nil?
|
|
110
|
+
|
|
111
|
+
Regexp.escape(nxt)
|
|
112
|
+
when "*"
|
|
113
|
+
parse_star(at_segment_start: at_segment_start)
|
|
114
|
+
when "?"
|
|
115
|
+
advance
|
|
116
|
+
# respect leading-dot rule at a segment start
|
|
117
|
+
at_segment_start && !@dot ? "(?!\\.)#{not_sep}" : not_sep
|
|
118
|
+
when "["
|
|
119
|
+
parse_class
|
|
120
|
+
when "{"
|
|
121
|
+
parse_brace
|
|
122
|
+
when "}", ")", "|", ","
|
|
123
|
+
# Only meaningful inside the corresponding construct; at top level treat
|
|
124
|
+
# as a literal so patterns like "a,b" or "100%}" don't explode.
|
|
125
|
+
Regexp.escape(advance)
|
|
126
|
+
else
|
|
127
|
+
Regexp.escape(advance)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def parse_star(at_segment_start:)
|
|
132
|
+
advance # consume first '*'
|
|
133
|
+
if current == "*"
|
|
134
|
+
# globstar: consume all consecutive '*'
|
|
135
|
+
advance while current == "*"
|
|
136
|
+
parse_globstar(at_segment_start: at_segment_start)
|
|
137
|
+
elsif at_segment_start && !@dot
|
|
138
|
+
"(?:(?!\\.)#{not_sep}*)"
|
|
139
|
+
else
|
|
140
|
+
"#{not_sep}*"
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Globstar (`**`) matches across separators. The special, shell-standard
|
|
145
|
+
# form is `**/`, which matches zero or more *whole* path segments — so
|
|
146
|
+
# `src/**/*.rb` also matches `src/foo.rb`. To make "zero segments" work we
|
|
147
|
+
# swallow the trailing separator here and emit a group that can match
|
|
148
|
+
# nothing.
|
|
149
|
+
def parse_globstar(at_segment_start:)
|
|
150
|
+
if at_segment_start && current == @sep
|
|
151
|
+
advance # consume the '/' that follows '**'
|
|
152
|
+
seg = @dot ? "#{not_sep}+" : "(?!\\.)#{not_sep}*"
|
|
153
|
+
# zero or more "segment/" groups; matches the empty string too
|
|
154
|
+
"(?:#{seg}#{sep_re})*"
|
|
155
|
+
elsif at_segment_start && !@dot
|
|
156
|
+
"(?:(?!\\.).)*"
|
|
157
|
+
else
|
|
158
|
+
".*"
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def parse_class
|
|
163
|
+
advance # '['
|
|
164
|
+
negate = false
|
|
165
|
+
if current == "!" || current == "^"
|
|
166
|
+
negate = true
|
|
167
|
+
advance
|
|
168
|
+
end
|
|
169
|
+
body = +""
|
|
170
|
+
# A ']' immediately after the (optional) negation is a literal ']'.
|
|
171
|
+
if current == "]"
|
|
172
|
+
body << "\\]"
|
|
173
|
+
advance
|
|
174
|
+
end
|
|
175
|
+
until eof? || current == "]"
|
|
176
|
+
ch = advance
|
|
177
|
+
body << if ch == "\\"
|
|
178
|
+
# explicit escape inside the class: take the next char literally
|
|
179
|
+
nxt = advance
|
|
180
|
+
raise ParseError, "dangling escape in character class" if nxt.nil?
|
|
181
|
+
|
|
182
|
+
escape_in_class(nxt)
|
|
183
|
+
elsif ch == "-"
|
|
184
|
+
# keep '-' so ranges like 0-9 / a-z are preserved
|
|
185
|
+
"-"
|
|
186
|
+
else
|
|
187
|
+
escape_in_class(ch)
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
raise ParseError, "unterminated character class" if eof?
|
|
191
|
+
|
|
192
|
+
advance # ']'
|
|
193
|
+
"[#{negate ? '^' : ''}#{body}]"
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Escape a single character so it is a literal inside a Ruby regex
|
|
197
|
+
# character class. Only ']', '\\' and '^' are special there (we treat '-'
|
|
198
|
+
# specially in the caller so ranges survive).
|
|
199
|
+
def escape_in_class(ch)
|
|
200
|
+
case ch
|
|
201
|
+
when "]", "\\", "^"
|
|
202
|
+
"\\#{ch}"
|
|
203
|
+
else
|
|
204
|
+
ch
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# {a,b,c} -> (?:a|b|c)
|
|
209
|
+
# {1..5} -> (?:1|2|3|4|5)
|
|
210
|
+
def parse_brace
|
|
211
|
+
advance # '{'
|
|
212
|
+
# numeric range?
|
|
213
|
+
if (range = try_numeric_range)
|
|
214
|
+
return range
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
alts = []
|
|
218
|
+
depth = 0
|
|
219
|
+
current_alt = +""
|
|
220
|
+
loop do
|
|
221
|
+
raise ParseError, "unterminated brace" if eof?
|
|
222
|
+
|
|
223
|
+
c = current
|
|
224
|
+
if c == "}" && depth.zero?
|
|
225
|
+
advance
|
|
226
|
+
alts << current_alt
|
|
227
|
+
break
|
|
228
|
+
elsif c == "," && depth.zero?
|
|
229
|
+
advance
|
|
230
|
+
alts << current_alt
|
|
231
|
+
current_alt = +""
|
|
232
|
+
elsif c == "{"
|
|
233
|
+
depth += 1
|
|
234
|
+
current_alt << advance
|
|
235
|
+
elsif c == "}"
|
|
236
|
+
depth -= 1
|
|
237
|
+
current_alt << advance
|
|
238
|
+
else
|
|
239
|
+
current_alt << advance
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# A brace with no comma (e.g. "{foo}") is treated literally by bash.
|
|
244
|
+
if alts.length == 1
|
|
245
|
+
return "\\{#{compile_fragment(alts.first)}\\}"
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
"(?:#{alts.map { |a| compile_fragment(a) }.join('|')})"
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Try to parse "{m..n}" starting just after '{'. Restores position on failure.
|
|
252
|
+
def try_numeric_range
|
|
253
|
+
start = @pos
|
|
254
|
+
num1 = +""
|
|
255
|
+
num1 << advance while current&.match?(/\d/)
|
|
256
|
+
if current == "." && peek == "." && !num1.empty?
|
|
257
|
+
advance
|
|
258
|
+
advance
|
|
259
|
+
num2 = +""
|
|
260
|
+
num2 << advance while current&.match?(/\d/)
|
|
261
|
+
if current == "}" && !num2.empty?
|
|
262
|
+
advance
|
|
263
|
+
lo = num1.to_i
|
|
264
|
+
hi = num2.to_i
|
|
265
|
+
range = lo <= hi ? (lo..hi) : (hi..lo).to_a.reverse
|
|
266
|
+
return "(?:#{range.to_a.join('|')})"
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
@pos = start
|
|
270
|
+
nil
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# extglob: PREFIX( pat | pat | ... )
|
|
274
|
+
def parse_extglob
|
|
275
|
+
prefix = advance # @ ? * + !
|
|
276
|
+
advance # '('
|
|
277
|
+
alts = []
|
|
278
|
+
current_alt = +""
|
|
279
|
+
depth = 0
|
|
280
|
+
loop do
|
|
281
|
+
raise ParseError, "unterminated extglob" if eof?
|
|
282
|
+
|
|
283
|
+
c = current
|
|
284
|
+
if c == ")" && depth.zero?
|
|
285
|
+
advance
|
|
286
|
+
alts << current_alt
|
|
287
|
+
break
|
|
288
|
+
elsif c == "|" && depth.zero?
|
|
289
|
+
advance
|
|
290
|
+
alts << current_alt
|
|
291
|
+
current_alt = +""
|
|
292
|
+
elsif c == "("
|
|
293
|
+
depth += 1
|
|
294
|
+
current_alt << advance
|
|
295
|
+
elsif c == ")"
|
|
296
|
+
depth -= 1
|
|
297
|
+
current_alt << advance
|
|
298
|
+
else
|
|
299
|
+
current_alt << advance
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
inner = "(?:#{alts.map { |a| compile_fragment(a) }.join('|')})"
|
|
304
|
+
case prefix
|
|
305
|
+
when "@" then inner
|
|
306
|
+
when "?" then "#{inner}?"
|
|
307
|
+
when "*" then "#{inner}*"
|
|
308
|
+
when "+" then "#{inner}+"
|
|
309
|
+
when "!" then "(?:(?!#{inner}#{sep_re}*$)#{not_sep}*)"
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Compile a nested fragment (alternative body, extglob branch) using the
|
|
314
|
+
# same options. We don't apply the leading-dot rule inside fragments.
|
|
315
|
+
def compile_fragment(str)
|
|
316
|
+
sub = Compiler.new(separator: @sep, dot: true, extglob: @extglob, nocase: @nocase)
|
|
317
|
+
sub.compile_body(str)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
protected
|
|
321
|
+
|
|
322
|
+
# Compile +str+ to a regex body string (no anchors). Used for nested
|
|
323
|
+
# fragments so we get full recursive glob support inside {} and extglobs.
|
|
324
|
+
def compile_body(str)
|
|
325
|
+
@chars = str.chars
|
|
326
|
+
@pos = 0
|
|
327
|
+
body = parse_sequence
|
|
328
|
+
raise ParseError, "unexpected #{current.inspect}" unless eof?
|
|
329
|
+
|
|
330
|
+
body
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
end
|
data/lib/picoglob.rb
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "picoglob/version"
|
|
4
|
+
require_relative "picoglob/compiler"
|
|
5
|
+
|
|
6
|
+
# Picoglob compiles bash-style glob patterns into Ruby Regexps so you can match
|
|
7
|
+
# *arbitrary strings* — S3 keys, routes, log lines, branch names — not just
|
|
8
|
+
# files on disk.
|
|
9
|
+
#
|
|
10
|
+
# It's the missing Ruby counterpart to JS's picomatch / minimatch. Ruby ships
|
|
11
|
+
# `File.fnmatch` and `Dir.glob`, but neither gives you a reusable `Regexp`, and
|
|
12
|
+
# `File.fnmatch` has limited brace/extglob support that's awkward to use off the
|
|
13
|
+
# filesystem.
|
|
14
|
+
#
|
|
15
|
+
# @example One-shot match
|
|
16
|
+
# Picoglob.match?("src/**/*.{rb,erb}", "src/app/models/user.rb") # => true
|
|
17
|
+
#
|
|
18
|
+
# @example Compile once, match many (fast)
|
|
19
|
+
# g = Picoglob.new("logs/*.log")
|
|
20
|
+
# g.match?("logs/app.log") # => true
|
|
21
|
+
# g.match?("logs/2026/x.log") # => false (single * doesn't cross "/")
|
|
22
|
+
#
|
|
23
|
+
# @example Filter a list
|
|
24
|
+
# Picoglob.filter("**/*.rb", ["a.rb", "lib/b.rb", "c.txt"]) # => ["a.rb", "lib/b.rb"]
|
|
25
|
+
#
|
|
26
|
+
# @example Extglob
|
|
27
|
+
# Picoglob.match?("image.+(jpg|png)", "image.png") # => true
|
|
28
|
+
module Picoglob
|
|
29
|
+
# Build a reusable matcher.
|
|
30
|
+
#
|
|
31
|
+
# @param pattern [String] the glob pattern
|
|
32
|
+
# @param separator [String] path separator that `*`/`?` won't cross (default "/")
|
|
33
|
+
# @param dot [Boolean] match leading dots with wildcards (default false, shell-like)
|
|
34
|
+
# @param extglob [Boolean] enable extglob syntax (default true)
|
|
35
|
+
# @param nocase [Boolean] case-insensitive (default false)
|
|
36
|
+
# @return [Picoglob::Matcher]
|
|
37
|
+
def self.new(pattern, **opts)
|
|
38
|
+
Matcher.new(pattern, **opts)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Convenience: does +string+ match +pattern+?
|
|
42
|
+
# @return [Boolean]
|
|
43
|
+
def self.match?(pattern, string, **opts)
|
|
44
|
+
Matcher.new(pattern, **opts).match?(string)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Convenience: compile +pattern+ to a Regexp.
|
|
48
|
+
# @return [Regexp]
|
|
49
|
+
def self.to_regexp(pattern, **opts)
|
|
50
|
+
Matcher.new(pattern, **opts).to_regexp
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Convenience: keep only the strings that match +pattern+.
|
|
54
|
+
# @return [Array<String>]
|
|
55
|
+
def self.filter(pattern, strings, **opts)
|
|
56
|
+
Matcher.new(pattern, **opts).filter(strings)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# A compiled glob. Compile once, match many times.
|
|
60
|
+
class Matcher
|
|
61
|
+
# @return [String] the original glob pattern
|
|
62
|
+
attr_reader :pattern
|
|
63
|
+
# @return [Regexp] the compiled regular expression
|
|
64
|
+
attr_reader :regexp
|
|
65
|
+
|
|
66
|
+
def initialize(pattern, separator: "/", dot: false, extglob: true, nocase: false)
|
|
67
|
+
@pattern = pattern
|
|
68
|
+
@regexp = Compiler.new(
|
|
69
|
+
separator: separator, dot: dot, extglob: extglob, nocase: nocase
|
|
70
|
+
).compile(pattern)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# @param string [String]
|
|
74
|
+
# @return [Boolean]
|
|
75
|
+
def match?(string)
|
|
76
|
+
@regexp.match?(string)
|
|
77
|
+
end
|
|
78
|
+
alias === match?
|
|
79
|
+
|
|
80
|
+
# @return [Regexp]
|
|
81
|
+
def to_regexp
|
|
82
|
+
@regexp
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# @param strings [Enumerable<String>]
|
|
86
|
+
# @return [Array<String>]
|
|
87
|
+
def filter(strings)
|
|
88
|
+
strings.select { |s| @regexp.match?(s) }
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: picoglob
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Levelbrook Team
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-06-04 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: minitest
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '5.0'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '5.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '13.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '13.0'
|
|
41
|
+
description: |-
|
|
42
|
+
Picoglob turns bash-style glob patterns (*, **, ?, [...], {a,b}, {1..3}, and extglobs
|
|
43
|
+
like @(a|b), +(a|b), !(a|b)) into reusable Ruby Regexps, so you can match arbitrary
|
|
44
|
+
strings -- S3 keys, routes, log lines, branch names -- not just files on disk. It is
|
|
45
|
+
the missing Ruby counterpart to JavaScript's picomatch / minimatch. Pure Ruby, no
|
|
46
|
+
dependencies.
|
|
47
|
+
email:
|
|
48
|
+
- levelbrookteam@gmail.com
|
|
49
|
+
executables: []
|
|
50
|
+
extensions: []
|
|
51
|
+
extra_rdoc_files: []
|
|
52
|
+
files:
|
|
53
|
+
- CHANGELOG.md
|
|
54
|
+
- LICENSE
|
|
55
|
+
- README.md
|
|
56
|
+
- lib/picoglob.rb
|
|
57
|
+
- lib/picoglob/compiler.rb
|
|
58
|
+
- lib/picoglob/version.rb
|
|
59
|
+
homepage: https://consulting.levelbrook.com
|
|
60
|
+
licenses:
|
|
61
|
+
- MIT
|
|
62
|
+
metadata:
|
|
63
|
+
homepage_uri: https://consulting.levelbrook.com
|
|
64
|
+
source_code_uri: https://github.com/tachyurgy/picoglob
|
|
65
|
+
changelog_uri: https://github.com/tachyurgy/picoglob/blob/main/CHANGELOG.md
|
|
66
|
+
rubygems_mfa_required: 'true'
|
|
67
|
+
post_install_message:
|
|
68
|
+
rdoc_options: []
|
|
69
|
+
require_paths:
|
|
70
|
+
- lib
|
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: 3.0.0
|
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
|
+
requirements:
|
|
78
|
+
- - ">="
|
|
79
|
+
- !ruby/object:Gem::Version
|
|
80
|
+
version: '0'
|
|
81
|
+
requirements: []
|
|
82
|
+
rubygems_version: 3.5.22
|
|
83
|
+
signing_key:
|
|
84
|
+
specification_version: 4
|
|
85
|
+
summary: Compile bash-style glob patterns into Ruby Regexps (picomatch for Ruby).
|
|
86
|
+
test_files: []
|