gs_img_fetcher 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +13 -1
- data/exe/gs_img_fetcher +11 -1
- data/lib/gs_img_fetcher/fetcher.rb +3 -2
- data/lib/gs_img_fetcher/manager.rb +3 -2
- data/lib/gs_img_fetcher/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f61655b14e1a59cfe87809b12cbd0ba98e2886cf8bbcb7a1e15681c99d5c53d4
|
4
|
+
data.tar.gz: 319618a2329511a0c680deb8016eddcc668f9630e5d07131ebeda8440e479392
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5c1fe6b7dc0a7983ad209b0e790e647eded6799141423312dd91d2ba303f93a6e22b94a5fe81da55ca6561c7e72d5f01172005d4cdef857e82a419142710e1e
|
7
|
+
data.tar.gz: 63d3769db8e5268d81a099f9d3512ef23e5c41557defab7b012cb98b7a3157dde5353fd0061d012cd5fc5a29aea5719a59dad9d6c118efb3fa1af6c3c12301be
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -21,6 +21,18 @@ And then execute:
|
|
21
21
|
Or install it yourself as:
|
22
22
|
|
23
23
|
$ gem install gs_img_fetcher
|
24
|
+
|
25
|
+
## Features
|
26
|
+
|
27
|
+
### Concurrency
|
28
|
+
`gs_img_fetcher` is designed with concurrency in mind. It can be configured to fetch images either asynchronously or synchronously.
|
29
|
+
By default, it runs asynchronously and the maximum number of threads depends on what your machine allows.
|
30
|
+
For a relatively small input, it would be better to specify `--no-async` option.
|
31
|
+
Check out the options `--async` and `--max_threads`.
|
32
|
+
|
33
|
+
### File size limit
|
34
|
+
You can set a limit on the maximum size of each downloaded image to avoid downloading unexpectedly large files and filling up your storage.
|
35
|
+
By default, it runs without a limit. Use `--max_size` option to set one.
|
24
36
|
|
25
37
|
## Usage
|
26
38
|
|
@@ -33,7 +45,7 @@ http://example.com/image1.jpg
|
|
33
45
|
http://example.com/image1.png
|
34
46
|
http://example.com/image1.svg
|
35
47
|
|
36
|
-
$ gs_img_fetcher run urls.txt output
|
48
|
+
$ gs_img_fetcher run urls.txt output --max_size=5
|
37
49
|
I, [2020-05-17T13:09:01.420214 #87392] INFO -- : Processing 3 URLs (3 valid, 0 invalid)
|
38
50
|
...
|
39
51
|
I, [2020-05-17T13:09:02.709097 #87392] INFO -- : Fetch complete (3 successful, 0 failed)
|
data/exe/gs_img_fetcher
CHANGED
@@ -35,9 +35,19 @@ module Commands
|
|
35
35
|
required: false,
|
36
36
|
type: :integer,
|
37
37
|
desc: 'Maximum number of threads.'
|
38
|
+
option :max_size,
|
39
|
+
required: false,
|
40
|
+
type: :integer,
|
41
|
+
desc: 'Maximum size of each downloaded image in MB.'
|
38
42
|
|
39
43
|
def call(input_path:, **opts)
|
40
|
-
|
44
|
+
puts opts
|
45
|
+
GsImgFetcher::Manager.fetch(
|
46
|
+
input_path,
|
47
|
+
max_threads: opts[:max_threads]&.to_i,
|
48
|
+
max_size: opts[:max_size]&.to_i,
|
49
|
+
**opts.slice(:output_dir, :async).compact
|
50
|
+
)
|
41
51
|
end
|
42
52
|
end
|
43
53
|
|
@@ -13,15 +13,16 @@ module GsImgFetcher
|
|
13
13
|
|
14
14
|
attr_reader :state, :uuid
|
15
15
|
|
16
|
-
def initialize(entry, output_dir)
|
16
|
+
def initialize(entry, output_dir, max_size: nil)
|
17
17
|
@entry = entry
|
18
18
|
@output_dir = output_dir
|
19
|
+
@max_size = max_size.yield_self { |s| s.present? ? s * 1024 * 1024 : nil }
|
19
20
|
@uuid = SecureRandom.uuid
|
20
21
|
@state = INITIALIZED
|
21
22
|
end
|
22
23
|
|
23
24
|
def fetch
|
24
|
-
@tempfile = Down.download(@entry.url)
|
25
|
+
@tempfile = Down.download(@entry.url, **{ max_size: @max_size }.compact)
|
25
26
|
@state = FETCHED
|
26
27
|
log_fetched
|
27
28
|
rescue Down::Error => e
|
@@ -15,11 +15,12 @@ module GsImgFetcher
|
|
15
15
|
|
16
16
|
DEFAULT_MAX_THREADS = Concurrent.processor_count
|
17
17
|
|
18
|
-
def initialize(entry_set, output_dir: nil, async: true, max_threads: nil)
|
18
|
+
def initialize(entry_set, output_dir: nil, async: true, max_threads: nil, max_size: nil)
|
19
19
|
@entry_set = entry_set
|
20
20
|
@output_dir = output_dir || Dir.pwd
|
21
21
|
@async = async
|
22
22
|
@max_threads = max_threads || DEFAULT_MAX_THREADS
|
23
|
+
@max_size = max_size
|
23
24
|
@entries = Queue.new
|
24
25
|
@successful_fetches = Queue.new
|
25
26
|
@failed_fetches = Queue.new
|
@@ -66,7 +67,7 @@ module GsImgFetcher
|
|
66
67
|
end
|
67
68
|
|
68
69
|
def fetch_one(entry, dir)
|
69
|
-
Fetcher.new(entry, dir).tap do |fetcher|
|
70
|
+
Fetcher.new(entry, dir, max_size: @max_size).tap do |fetcher|
|
70
71
|
fetcher.fetch && fetcher.save
|
71
72
|
|
72
73
|
if fetcher.successful?
|