s3grep 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7155353e97cda7850ee2e53dddd2d03e0951b426a9c3fec3408b8f2b2f91b7bb
4
- data.tar.gz: 6debc36f79d05e27787ffd5eff62373c51e2d9e314b8eb71a7b06b681bbfea6d
3
+ metadata.gz: b7fcbe863f65881a9cd23522655c6d0ec236029b2ba2e6ac52d89328390afbc4
4
+ data.tar.gz: f281306f058a995a25e042e9846aeeab2376aa560494fcea3e8fc91f565fc0c1
5
5
  SHA512:
6
- metadata.gz: 06a4120d12ac5dc6aaaf13b14cb08fe19a7ba2594e193920ea9c9d2376896049f6c61cc2976a0968126b00ee78a3750e7645a315f9d307428ac2629ef57ba8e6
7
- data.tar.gz: a5d0c0d85be34326b73517cdfc9ef0106e1c068665b02ca9bd64719ba60aae7c1c94ac67930bd05e0bc1970e9130b18306a2d84540e717665f4dd8935859d41b
6
+ metadata.gz: 5fce97df276b956fd836d2cc40d36137489a739271a41cf7923c43c6c4fd022e7dd14f6516e959a268bc9480407c92b01d7716b7302e48c1580437da22d4895a
7
+ data.tar.gz: b959b8a7f9981adcfc35f7130514de75e1c3f9e737652180e5e04623f89bcf8f8bbe4606f469f40f398c07ae8351e2cb4230b53b6402cc5cf7091c5b647f2937
data/bin/s3grep CHANGED
@@ -1,10 +1,50 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
3
4
  require 's3grep'
5
+ require 'aws-sdk-s3'
4
6
 
5
- regex = Regexp.new(ARGV[0])
7
+ options = {
8
+ ignore_case: false,
9
+ recursive: false,
10
+ file_pattern: /.*/
11
+ }
12
+ OptionParser.new do |opts|
13
+ opts.banner = 'Usage: s3grep [options]'
14
+
15
+ opts.on('-i', '--ignore-case', 'Ignore case') do
16
+ options[:ignore_case] = true
17
+ end
18
+
19
+ opts.on('-r', '--recursive', 'Search for file in folder') do
20
+ options[:recursive] = true
21
+ end
22
+
23
+ opts.on('--include FILE_PATTERN', 'Include matching files') do |v|
24
+ options[:file_pattern] = Regexp.new(v, Regexp::IGNORECASE)
25
+ end
26
+ end.parse!
27
+
28
+ regex_options =
29
+ if options[:ignore_case]
30
+ Regexp::IGNORECASE
31
+ else
32
+ 0
33
+ end
34
+
35
+ regex = Regexp.new(ARGV[0], regex_options)
6
36
  s3_url = ARGV[1]
7
37
 
8
- S3Grep::Search.search(s3_url, Aws::S3::Client.new, regex) do |line_number, line|
9
- puts "#{s3_url}:#{line_number} #{line}"
38
+ aws_s3_client = Aws::S3::Client.new
39
+
40
+ if options[:recursive]
41
+ S3Grep::Directory.glob(s3_url, aws_s3_client, options[:file_pattern]) do |s3_file|
42
+ S3Grep::Search.search(s3_file, aws_s3_client, regex) do |line_number, line|
43
+ puts "#{s3_file}:#{line_number} #{line}"
44
+ end
45
+ end
46
+ else
47
+ S3Grep::Search.search(s3_url, aws_s3_client, regex) do |line_number, line|
48
+ puts "#{s3_url}:#{line_number} #{line}"
49
+ end
10
50
  end
@@ -0,0 +1,68 @@
1
+ require 'aws-sdk-s3'
2
+ require 'cgi'
3
+
4
+ # Purpose search through a directory on S3 for a specified file pattern
5
+ module S3Grep
6
+ class Directory
7
+ attr_reader :s3_url,
8
+ :aws_s3_client
9
+
10
+ def initialize(s3_url, aws_s3_client)
11
+ @s3_url = s3_url
12
+ @aws_s3_client = aws_s3_client
13
+ end
14
+
15
+ def self.glob(s3_url, aws_s3_client, regex, &block)
16
+ new(s3_url, aws_s3_client).glob(regex, &block)
17
+ end
18
+
19
+ def glob(regex)
20
+ each do |s3_file|
21
+ next unless s3_file.match?(regex)
22
+
23
+ yield s3_file
24
+ end
25
+ end
26
+
27
+ def each
28
+ uri = URI(s3_url)
29
+
30
+ max_keys = 1_000
31
+
32
+ prefix = CGI.unescape(uri.path[1..-1] || '')
33
+
34
+ resp = aws_s3_client.list_objects(
35
+ {
36
+ bucket: uri.host,
37
+ prefix: prefix,
38
+ max_keys: max_keys
39
+ }
40
+ )
41
+
42
+ resp.contents.each do |content|
43
+ yield('s3://' + uri.host + '/' + escape_path(content.key))
44
+ end
45
+
46
+ while resp.contents.size == max_keys
47
+ marker = resp.contents.last.key
48
+
49
+ resp = aws_s3_client.list_objects(
50
+ {
51
+ bucket: uri.host,
52
+ prefix: prefix,
53
+ max_keys: max_keys,
54
+ marker: marker
55
+ }
56
+ )
57
+
58
+ resp.contents.each do |content|
59
+ yield('s3://' + uri.host + '/' + escape_path(content.key))
60
+ end
61
+ end
62
+ end
63
+
64
+ def escape_path(s3_path)
65
+ s3_path.split('/').map { |part| CGI.escape(part) }.join('/')
66
+ end
67
+ end
68
+ end
data/lib/s3grep/search.rb CHANGED
@@ -1,24 +1,32 @@
1
1
  require 'aws-sdk-s3'
2
+ require 'cgi'
2
3
 
3
4
  module S3Grep
4
5
  class Search
5
6
  attr_reader :s3_url,
6
- :aws_s3_client
7
+ :aws_s3_client,
8
+ :compression
7
9
 
8
- def initialize(s3_url, aws_s3_client)
10
+ def initialize(s3_url, aws_s3_client, compression = nil)
9
11
  @s3_url = s3_url
10
12
  @aws_s3_client = aws_s3_client
13
+ @compression = compression
11
14
  end
12
15
 
13
16
  def self.search(s3_url, aws_s3_client, regex, &block)
14
- new(s3_url, aws_s3_client).search(regex, &block)
17
+ new(s3_url, aws_s3_client, detect_compression(s3_url)).search(regex, &block)
15
18
  end
16
19
 
17
- def search(regex)
18
- body = s3_object.body
20
+ def self.detect_compression(s3_url)
21
+ return :gzip if s3_url =~ /\.gz$/i
22
+ return :zip if s3_url =~ /\.zip$/i
19
23
 
24
+ nil
25
+ end
26
+
27
+ def search(regex)
20
28
  line_number = 0
21
- body.each do |line|
29
+ to_io.each do |line|
22
30
  line_number += 1
23
31
  next unless line.match?(regex)
24
32
 
@@ -32,9 +40,23 @@ module S3Grep
32
40
  aws_s3_client.get_object(
33
41
  {
34
42
  bucket: uri.host,
35
- key: uri.path[1..-1]
43
+ key: CGI.unescape(uri.path[1..-1])
36
44
  }
37
45
  )
38
46
  end
47
+
48
+ def to_io
49
+ body = s3_object.body
50
+
51
+ if compression == :gzip
52
+ Zlib::GzipReader.new(body)
53
+ elsif compression == :zip
54
+ require 'zip'
55
+ zip = Zip::File.open_buffer(body)
56
+ zip.entries.first.get_input_stream
57
+ else
58
+ body
59
+ end
60
+ end
39
61
  end
40
62
  end
data/lib/s3grep.rb CHANGED
@@ -1,3 +1,4 @@
1
1
  module S3Grep
2
+ autoload :Directory, 's3grep/directory'
2
3
  autoload :Search, 's3grep/search'
3
4
  end
data/s3grep.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 's3grep'
5
- s.version = '0.1.0'
5
+ s.version = '0.1.3'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'Search through S3 files'
8
8
  s.description = 'Tools for searching files on S3'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: s3grep
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-29 00:00:00.000000000 Z
11
+ date: 2022-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-s3
@@ -40,6 +40,7 @@ files:
40
40
  - README.md
41
41
  - bin/s3grep
42
42
  - lib/s3grep.rb
43
+ - lib/s3grep/directory.rb
43
44
  - lib/s3grep/search.rb
44
45
  - s3grep.gemspec
45
46
  - script/console