s3grep 0.1.0 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7155353e97cda7850ee2e53dddd2d03e0951b426a9c3fec3408b8f2b2f91b7bb
4
- data.tar.gz: 6debc36f79d05e27787ffd5eff62373c51e2d9e314b8eb71a7b06b681bbfea6d
3
+ metadata.gz: b7fcbe863f65881a9cd23522655c6d0ec236029b2ba2e6ac52d89328390afbc4
4
+ data.tar.gz: f281306f058a995a25e042e9846aeeab2376aa560494fcea3e8fc91f565fc0c1
5
5
  SHA512:
6
- metadata.gz: 06a4120d12ac5dc6aaaf13b14cb08fe19a7ba2594e193920ea9c9d2376896049f6c61cc2976a0968126b00ee78a3750e7645a315f9d307428ac2629ef57ba8e6
7
- data.tar.gz: a5d0c0d85be34326b73517cdfc9ef0106e1c068665b02ca9bd64719ba60aae7c1c94ac67930bd05e0bc1970e9130b18306a2d84540e717665f4dd8935859d41b
6
+ metadata.gz: 5fce97df276b956fd836d2cc40d36137489a739271a41cf7923c43c6c4fd022e7dd14f6516e959a268bc9480407c92b01d7716b7302e48c1580437da22d4895a
7
+ data.tar.gz: b959b8a7f9981adcfc35f7130514de75e1c3f9e737652180e5e04623f89bcf8f8bbe4606f469f40f398c07ae8351e2cb4230b53b6402cc5cf7091c5b647f2937
data/bin/s3grep CHANGED
@@ -1,10 +1,50 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
3
4
  require 's3grep'
5
+ require 'aws-sdk-s3'
4
6
 
5
- regex = Regexp.new(ARGV[0])
7
+ options = {
8
+ ignore_case: false,
9
+ recursive: false,
10
+ file_pattern: /.*/
11
+ }
12
+ OptionParser.new do |opts|
13
+ opts.banner = 'Usage: s3grep [options]'
14
+
15
+ opts.on('-i', '--ignore-case', 'Ignore case') do
16
+ options[:ignore_case] = true
17
+ end
18
+
19
+ opts.on('-r', '--recursive', 'Search for file in folder') do
20
+ options[:recursive] = true
21
+ end
22
+
23
+ opts.on('--include FILE_PATTERN', 'Include matching files') do |v|
24
+ options[:file_pattern] = Regexp.new(v, Regexp::IGNORECASE)
25
+ end
26
+ end.parse!
27
+
28
+ regex_options =
29
+ if options[:ignore_case]
30
+ Regexp::IGNORECASE
31
+ else
32
+ 0
33
+ end
34
+
35
+ regex = Regexp.new(ARGV[0], regex_options)
6
36
  s3_url = ARGV[1]
7
37
 
8
- S3Grep::Search.search(s3_url, Aws::S3::Client.new, regex) do |line_number, line|
9
- puts "#{s3_url}:#{line_number} #{line}"
38
+ aws_s3_client = Aws::S3::Client.new
39
+
40
+ if options[:recursive]
41
+ S3Grep::Directory.glob(s3_url, aws_s3_client, options[:file_pattern]) do |s3_file|
42
+ S3Grep::Search.search(s3_file, aws_s3_client, regex) do |line_number, line|
43
+ puts "#{s3_file}:#{line_number} #{line}"
44
+ end
45
+ end
46
+ else
47
+ S3Grep::Search.search(s3_url, aws_s3_client, regex) do |line_number, line|
48
+ puts "#{s3_url}:#{line_number} #{line}"
49
+ end
10
50
  end
@@ -0,0 +1,68 @@
1
+ require 'aws-sdk-s3'
2
+ require 'cgi'
3
+
4
+ # Purpose search through a directory on S3 for a specified file pattern
5
+ module S3Grep
6
+ class Directory
7
+ attr_reader :s3_url,
8
+ :aws_s3_client
9
+
10
+ def initialize(s3_url, aws_s3_client)
11
+ @s3_url = s3_url
12
+ @aws_s3_client = aws_s3_client
13
+ end
14
+
15
+ def self.glob(s3_url, aws_s3_client, regex, &block)
16
+ new(s3_url, aws_s3_client).glob(regex, &block)
17
+ end
18
+
19
+ def glob(regex)
20
+ each do |s3_file|
21
+ next unless s3_file.match?(regex)
22
+
23
+ yield s3_file
24
+ end
25
+ end
26
+
27
+ def each
28
+ uri = URI(s3_url)
29
+
30
+ max_keys = 1_000
31
+
32
+ prefix = CGI.unescape(uri.path[1..-1] || '')
33
+
34
+ resp = aws_s3_client.list_objects(
35
+ {
36
+ bucket: uri.host,
37
+ prefix: prefix,
38
+ max_keys: max_keys
39
+ }
40
+ )
41
+
42
+ resp.contents.each do |content|
43
+ yield('s3://' + uri.host + '/' + escape_path(content.key))
44
+ end
45
+
46
+ while resp.contents.size == max_keys
47
+ marker = resp.contents.last.key
48
+
49
+ resp = aws_s3_client.list_objects(
50
+ {
51
+ bucket: uri.host,
52
+ prefix: prefix,
53
+ max_keys: max_keys,
54
+ marker: marker
55
+ }
56
+ )
57
+
58
+ resp.contents.each do |content|
59
+ yield('s3://' + uri.host + '/' + escape_path(content.key))
60
+ end
61
+ end
62
+ end
63
+
64
+ def escape_path(s3_path)
65
+ s3_path.split('/').map { |part| CGI.escape(part) }.join('/')
66
+ end
67
+ end
68
+ end
data/lib/s3grep/search.rb CHANGED
@@ -1,24 +1,32 @@
1
1
  require 'aws-sdk-s3'
2
+ require 'cgi'
2
3
 
3
4
  module S3Grep
4
5
  class Search
5
6
  attr_reader :s3_url,
6
- :aws_s3_client
7
+ :aws_s3_client,
8
+ :compression
7
9
 
8
- def initialize(s3_url, aws_s3_client)
10
+ def initialize(s3_url, aws_s3_client, compression = nil)
9
11
  @s3_url = s3_url
10
12
  @aws_s3_client = aws_s3_client
13
+ @compression = compression
11
14
  end
12
15
 
13
16
  def self.search(s3_url, aws_s3_client, regex, &block)
14
- new(s3_url, aws_s3_client).search(regex, &block)
17
+ new(s3_url, aws_s3_client, detect_compression(s3_url)).search(regex, &block)
15
18
  end
16
19
 
17
- def search(regex)
18
- body = s3_object.body
20
+ def self.detect_compression(s3_url)
21
+ return :gzip if s3_url =~ /\.gz$/i
22
+ return :zip if s3_url =~ /\.zip$/i
19
23
 
24
+ nil
25
+ end
26
+
27
+ def search(regex)
20
28
  line_number = 0
21
- body.each do |line|
29
+ to_io.each do |line|
22
30
  line_number += 1
23
31
  next unless line.match?(regex)
24
32
 
@@ -32,9 +40,23 @@ module S3Grep
32
40
  aws_s3_client.get_object(
33
41
  {
34
42
  bucket: uri.host,
35
- key: uri.path[1..-1]
43
+ key: CGI.unescape(uri.path[1..-1])
36
44
  }
37
45
  )
38
46
  end
47
+
48
+ def to_io
49
+ body = s3_object.body
50
+
51
+ if compression == :gzip
52
+ Zlib::GzipReader.new(body)
53
+ elsif compression == :zip
54
+ require 'zip'
55
+ zip = Zip::File.open_buffer(body)
56
+ zip.entries.first.get_input_stream
57
+ else
58
+ body
59
+ end
60
+ end
39
61
  end
40
62
  end
data/lib/s3grep.rb CHANGED
@@ -1,3 +1,4 @@
1
1
  module S3Grep
2
+ autoload :Directory, 's3grep/directory'
2
3
  autoload :Search, 's3grep/search'
3
4
  end
data/s3grep.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 's3grep'
5
- s.version = '0.1.0'
5
+ s.version = '0.1.3'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'Search through S3 files'
8
8
  s.description = 'Tools for searching files on S3'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: s3grep
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-29 00:00:00.000000000 Z
11
+ date: 2022-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-s3
@@ -40,6 +40,7 @@ files:
40
40
  - README.md
41
41
  - bin/s3grep
42
42
  - lib/s3grep.rb
43
+ - lib/s3grep/directory.rb
43
44
  - lib/s3grep/search.rb
44
45
  - s3grep.gemspec
45
46
  - script/console