s3grep 0.1.0 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/s3grep +43 -3
- data/lib/s3grep/directory.rb +68 -0
- data/lib/s3grep/search.rb +29 -7
- data/lib/s3grep.rb +1 -0
- data/s3grep.gemspec +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7fcbe863f65881a9cd23522655c6d0ec236029b2ba2e6ac52d89328390afbc4
|
4
|
+
data.tar.gz: f281306f058a995a25e042e9846aeeab2376aa560494fcea3e8fc91f565fc0c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5fce97df276b956fd836d2cc40d36137489a739271a41cf7923c43c6c4fd022e7dd14f6516e959a268bc9480407c92b01d7716b7302e48c1580437da22d4895a
|
7
|
+
data.tar.gz: b959b8a7f9981adcfc35f7130514de75e1c3f9e737652180e5e04623f89bcf8f8bbe4606f469f40f398c07ae8351e2cb4230b53b6402cc5cf7091c5b647f2937
|
data/bin/s3grep
CHANGED
@@ -1,10 +1,50 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'optparse'
|
3
4
|
require 's3grep'
|
5
|
+
require 'aws-sdk-s3'
|
4
6
|
|
5
|
-
|
7
|
+
options = {
|
8
|
+
ignore_case: false,
|
9
|
+
recursive: false,
|
10
|
+
file_pattern: /.*/
|
11
|
+
}
|
12
|
+
OptionParser.new do |opts|
|
13
|
+
opts.banner = 'Usage: s3grep [options]'
|
14
|
+
|
15
|
+
opts.on('-i', '--ignore-case', 'Ignore case') do
|
16
|
+
options[:ignore_case] = true
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on('-r', '--recursive', 'Search for file in folder') do
|
20
|
+
options[:recursive] = true
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on('--include FILE_PATTERN', 'Include matching files') do |v|
|
24
|
+
options[:file_pattern] = Regexp.new(v, Regexp::IGNORECASE)
|
25
|
+
end
|
26
|
+
end.parse!
|
27
|
+
|
28
|
+
regex_options =
|
29
|
+
if options[:ignore_case]
|
30
|
+
Regexp::IGNORECASE
|
31
|
+
else
|
32
|
+
0
|
33
|
+
end
|
34
|
+
|
35
|
+
regex = Regexp.new(ARGV[0], regex_options)
|
6
36
|
s3_url = ARGV[1]
|
7
37
|
|
8
|
-
|
9
|
-
|
38
|
+
aws_s3_client = Aws::S3::Client.new
|
39
|
+
|
40
|
+
if options[:recursive]
|
41
|
+
S3Grep::Directory.glob(s3_url, aws_s3_client, options[:file_pattern]) do |s3_file|
|
42
|
+
S3Grep::Search.search(s3_file, aws_s3_client, regex) do |line_number, line|
|
43
|
+
puts "#{s3_file}:#{line_number} #{line}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
else
|
47
|
+
S3Grep::Search.search(s3_url, aws_s3_client, regex) do |line_number, line|
|
48
|
+
puts "#{s3_url}:#{line_number} #{line}"
|
49
|
+
end
|
10
50
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'aws-sdk-s3'
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
# Purpose search through a directory on S3 for a specified file pattern
|
5
|
+
module S3Grep
|
6
|
+
class Directory
|
7
|
+
attr_reader :s3_url,
|
8
|
+
:aws_s3_client
|
9
|
+
|
10
|
+
def initialize(s3_url, aws_s3_client)
|
11
|
+
@s3_url = s3_url
|
12
|
+
@aws_s3_client = aws_s3_client
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.glob(s3_url, aws_s3_client, regex, &block)
|
16
|
+
new(s3_url, aws_s3_client).glob(regex, &block)
|
17
|
+
end
|
18
|
+
|
19
|
+
def glob(regex)
|
20
|
+
each do |s3_file|
|
21
|
+
next unless s3_file.match?(regex)
|
22
|
+
|
23
|
+
yield s3_file
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def each
|
28
|
+
uri = URI(s3_url)
|
29
|
+
|
30
|
+
max_keys = 1_000
|
31
|
+
|
32
|
+
prefix = CGI.unescape(uri.path[1..-1] || '')
|
33
|
+
|
34
|
+
resp = aws_s3_client.list_objects(
|
35
|
+
{
|
36
|
+
bucket: uri.host,
|
37
|
+
prefix: prefix,
|
38
|
+
max_keys: max_keys
|
39
|
+
}
|
40
|
+
)
|
41
|
+
|
42
|
+
resp.contents.each do |content|
|
43
|
+
yield('s3://' + uri.host + '/' + escape_path(content.key))
|
44
|
+
end
|
45
|
+
|
46
|
+
while resp.contents.size == max_keys
|
47
|
+
marker = resp.contents.last.key
|
48
|
+
|
49
|
+
resp = aws_s3_client.list_objects(
|
50
|
+
{
|
51
|
+
bucket: uri.host,
|
52
|
+
prefix: prefix,
|
53
|
+
max_keys: max_keys,
|
54
|
+
marker: marker
|
55
|
+
}
|
56
|
+
)
|
57
|
+
|
58
|
+
resp.contents.each do |content|
|
59
|
+
yield('s3://' + uri.host + '/' + escape_path(content.key))
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def escape_path(s3_path)
|
65
|
+
s3_path.split('/').map { |part| CGI.escape(part) }.join('/')
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/s3grep/search.rb
CHANGED
@@ -1,24 +1,32 @@
|
|
1
1
|
require 'aws-sdk-s3'
|
2
|
+
require 'cgi'
|
2
3
|
|
3
4
|
module S3Grep
|
4
5
|
class Search
|
5
6
|
attr_reader :s3_url,
|
6
|
-
:aws_s3_client
|
7
|
+
:aws_s3_client,
|
8
|
+
:compression
|
7
9
|
|
8
|
-
def initialize(s3_url, aws_s3_client)
|
10
|
+
def initialize(s3_url, aws_s3_client, compression = nil)
|
9
11
|
@s3_url = s3_url
|
10
12
|
@aws_s3_client = aws_s3_client
|
13
|
+
@compression = compression
|
11
14
|
end
|
12
15
|
|
13
16
|
def self.search(s3_url, aws_s3_client, regex, &block)
|
14
|
-
new(s3_url, aws_s3_client).search(regex, &block)
|
17
|
+
new(s3_url, aws_s3_client, detect_compression(s3_url)).search(regex, &block)
|
15
18
|
end
|
16
19
|
|
17
|
-
def
|
18
|
-
|
20
|
+
def self.detect_compression(s3_url)
|
21
|
+
return :gzip if s3_url =~ /\.gz$/i
|
22
|
+
return :zip if s3_url =~ /\.zip$/i
|
19
23
|
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
|
27
|
+
def search(regex)
|
20
28
|
line_number = 0
|
21
|
-
|
29
|
+
to_io.each do |line|
|
22
30
|
line_number += 1
|
23
31
|
next unless line.match?(regex)
|
24
32
|
|
@@ -32,9 +40,23 @@ module S3Grep
|
|
32
40
|
aws_s3_client.get_object(
|
33
41
|
{
|
34
42
|
bucket: uri.host,
|
35
|
-
key: uri.path[1..-1]
|
43
|
+
key: CGI.unescape(uri.path[1..-1])
|
36
44
|
}
|
37
45
|
)
|
38
46
|
end
|
47
|
+
|
48
|
+
def to_io
|
49
|
+
body = s3_object.body
|
50
|
+
|
51
|
+
if compression == :gzip
|
52
|
+
Zlib::GzipReader.new(body)
|
53
|
+
elsif compression == :zip
|
54
|
+
require 'zip'
|
55
|
+
zip = Zip::File.open_buffer(body)
|
56
|
+
zip.entries.first.get_input_stream
|
57
|
+
else
|
58
|
+
body
|
59
|
+
end
|
60
|
+
end
|
39
61
|
end
|
40
62
|
end
|
data/lib/s3grep.rb
CHANGED
data/s3grep.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: s3grep
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-s3
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- README.md
|
41
41
|
- bin/s3grep
|
42
42
|
- lib/s3grep.rb
|
43
|
+
- lib/s3grep/directory.rb
|
43
44
|
- lib/s3grep/search.rb
|
44
45
|
- s3grep.gemspec
|
45
46
|
- script/console
|