gs_img_fetcher 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +13 -1
- data/exe/gs_img_fetcher +11 -1
- data/lib/gs_img_fetcher/fetcher.rb +3 -2
- data/lib/gs_img_fetcher/manager.rb +3 -2
- data/lib/gs_img_fetcher/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f61655b14e1a59cfe87809b12cbd0ba98e2886cf8bbcb7a1e15681c99d5c53d4
|
4
|
+
data.tar.gz: 319618a2329511a0c680deb8016eddcc668f9630e5d07131ebeda8440e479392
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5c1fe6b7dc0a7983ad209b0e790e647eded6799141423312dd91d2ba303f93a6e22b94a5fe81da55ca6561c7e72d5f01172005d4cdef857e82a419142710e1e
|
7
|
+
data.tar.gz: 63d3769db8e5268d81a099f9d3512ef23e5c41557defab7b012cb98b7a3157dde5353fd0061d012cd5fc5a29aea5719a59dad9d6c118efb3fa1af6c3c12301be
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -21,6 +21,18 @@ And then execute:
|
|
21
21
|
Or install it yourself as:
|
22
22
|
|
23
23
|
$ gem install gs_img_fetcher
|
24
|
+
|
25
|
+
## Features
|
26
|
+
|
27
|
+
### Concurrency
|
28
|
+
`gs_img_fetcher` is designed with concurrency in mind. It can be configured to fetch images either asynchronously or synchronously.
|
29
|
+
By default, it runs asynchronously and the maximum number of threads depends on what your machine allows.
|
30
|
+
For a relatively small input, it would be better to specify `--no-async` option.
|
31
|
+
Check out the options `--async` and `--max_threads`.
|
32
|
+
|
33
|
+
### File size limit
|
34
|
+
You can set a limit on the maximum size of each downloaded image to avoid downloading unexpectedly large files and filling up your storage.
|
35
|
+
By default, it runs without a limit. Use `--max_size` option to set one.
|
24
36
|
|
25
37
|
## Usage
|
26
38
|
|
@@ -33,7 +45,7 @@ http://example.com/image1.jpg
|
|
33
45
|
http://example.com/image1.png
|
34
46
|
http://example.com/image1.svg
|
35
47
|
|
36
|
-
$ gs_img_fetcher run urls.txt output
|
48
|
+
$ gs_img_fetcher run urls.txt output --max_size=5
|
37
49
|
I, [2020-05-17T13:09:01.420214 #87392] INFO -- : Processing 3 URLs (3 valid, 0 invalid)
|
38
50
|
...
|
39
51
|
I, [2020-05-17T13:09:02.709097 #87392] INFO -- : Fetch complete (3 successful, 0 failed)
|
data/exe/gs_img_fetcher
CHANGED
@@ -35,9 +35,19 @@ module Commands
|
|
35
35
|
required: false,
|
36
36
|
type: :integer,
|
37
37
|
desc: 'Maximum number of threads.'
|
38
|
+
option :max_size,
|
39
|
+
required: false,
|
40
|
+
type: :integer,
|
41
|
+
desc: 'Maximum size of each downloaded image in MB.'
|
38
42
|
|
39
43
|
def call(input_path:, **opts)
|
40
|
-
|
44
|
+
puts opts
|
45
|
+
GsImgFetcher::Manager.fetch(
|
46
|
+
input_path,
|
47
|
+
max_threads: opts[:max_threads]&.to_i,
|
48
|
+
max_size: opts[:max_size]&.to_i,
|
49
|
+
**opts.slice(:output_dir, :async).compact
|
50
|
+
)
|
41
51
|
end
|
42
52
|
end
|
43
53
|
|
@@ -13,15 +13,16 @@ module GsImgFetcher
|
|
13
13
|
|
14
14
|
attr_reader :state, :uuid
|
15
15
|
|
16
|
-
def initialize(entry, output_dir)
|
16
|
+
def initialize(entry, output_dir, max_size: nil)
|
17
17
|
@entry = entry
|
18
18
|
@output_dir = output_dir
|
19
|
+
@max_size = max_size.yield_self { |s| s.present? ? s * 1024 * 1024 : nil }
|
19
20
|
@uuid = SecureRandom.uuid
|
20
21
|
@state = INITIALIZED
|
21
22
|
end
|
22
23
|
|
23
24
|
def fetch
|
24
|
-
@tempfile = Down.download(@entry.url)
|
25
|
+
@tempfile = Down.download(@entry.url, **{ max_size: @max_size }.compact)
|
25
26
|
@state = FETCHED
|
26
27
|
log_fetched
|
27
28
|
rescue Down::Error => e
|
@@ -15,11 +15,12 @@ module GsImgFetcher
|
|
15
15
|
|
16
16
|
DEFAULT_MAX_THREADS = Concurrent.processor_count
|
17
17
|
|
18
|
-
def initialize(entry_set, output_dir: nil, async: true, max_threads: nil)
|
18
|
+
def initialize(entry_set, output_dir: nil, async: true, max_threads: nil, max_size: nil)
|
19
19
|
@entry_set = entry_set
|
20
20
|
@output_dir = output_dir || Dir.pwd
|
21
21
|
@async = async
|
22
22
|
@max_threads = max_threads || DEFAULT_MAX_THREADS
|
23
|
+
@max_size = max_size
|
23
24
|
@entries = Queue.new
|
24
25
|
@successful_fetches = Queue.new
|
25
26
|
@failed_fetches = Queue.new
|
@@ -66,7 +67,7 @@ module GsImgFetcher
|
|
66
67
|
end
|
67
68
|
|
68
69
|
def fetch_one(entry, dir)
|
69
|
-
Fetcher.new(entry, dir).tap do |fetcher|
|
70
|
+
Fetcher.new(entry, dir, max_size: @max_size).tap do |fetcher|
|
70
71
|
fetcher.fetch && fetcher.save
|
71
72
|
|
72
73
|
if fetcher.successful?
|