gs_img_fetcher 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -3
- data/Gemfile.lock +1 -1
- data/README.md +45 -7
- data/lib/gs_img_fetcher/input.rb +12 -7
- data/lib/gs_img_fetcher/manager.rb +7 -10
- data/lib/gs_img_fetcher/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9349da7d358fea7bb7ae7bb75472a396cd6810e3348f1ce09ad199a5e47ad7e
|
4
|
+
data.tar.gz: eeb7ef868ad8f4f7c1dbc5a260880a83c8fd1baca676b1994d05546af5751fe7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74dd2fe6d6d39ed3fb9f485c163d28121fcef11baa708d8ed2e0f713ed39d310efdd2db725e468ea7440e8615cad175bc52a20d31cdd60e17d954d06489f4cc1
|
7
|
+
data.tar.gz: 1376fb3f38b62c1cf2e336efa6596af1d29b861368a6ae185a18b706e52cc7e1606b3649d9f04c75d8cb650f5e8bcb08c46c5d1dfd9ffed63967f06e3b7f3838
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,8 @@
|
|
1
|
-
# GsImgFetcher
|
1
|
+
# 🖼 GsImgFetcher
|
2
|
+
|
3
|
+
[](https://travis-ci.org/AkihikoITOH/gs_img_fetcher)
|
4
|
+
[](https://badge.fury.io/rb/gs_img_fetcher)
|
5
|
+
[](https://codeclimate.com/github/AkihikoITOH/gs_img_fetcher/maintainability)
|
2
6
|
|
3
7
|
`gs_img_fetcher` is a tool to download images from remote hosts and save them on your local storage.
|
4
8
|
|
@@ -20,20 +24,54 @@ Or install it yourself as:
|
|
20
24
|
|
21
25
|
## Usage
|
22
26
|
|
27
|
+
### CLI
|
23
28
|
Let's say you have in your current directory a text file named `urls.txt` containing list of image URLs, each line containing one URL.
|
24
29
|
|
25
|
-
|
30
|
+
```sh
|
31
|
+
$ cat urls.txt
|
32
|
+
http://example.com/image1.jpg
|
33
|
+
http://example.com/image1.png
|
34
|
+
http://example.com/image1.svg
|
35
|
+
|
36
|
+
$ gs_img_fetcher run urls.txt output
|
37
|
+
I, [2020-05-17T13:09:01.420214 #87392] INFO -- : Processing 3 URLs (3 valid, 0 invalid)
|
38
|
+
...
|
39
|
+
I, [2020-05-17T13:09:02.709097 #87392] INFO -- : Fetch complete (3 successful, 0 failed)
|
40
|
+
|
41
|
+
$ ls output
|
42
|
+
1e8256aa-5cb7-4545-9109-65aaa550deac.jpg 49d4f436-110f-4206-a2d6-07cc6156fc56.png a5b4ce07-1fc3-49e3-b558-44f8c4afaaab.svg
|
43
|
+
```
|
44
|
+
|
45
|
+
Running `gs_img_fetcher run urls.txt output` would take URLs from `urls.txt`, downloads the images and saves them in the directory `output`.
|
26
46
|
|
27
47
|
Run `gs_img_fetcher --help` to show usage guide.
|
28
48
|
|
29
49
|
Set the environment variable `NOLOG` to a truthy value to suppress logs.
|
30
50
|
|
31
|
-
|
51
|
+
### Hooking GsImgFetcher into your own application
|
32
52
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
53
|
+
#### Fetching a single image
|
54
|
+
```ruby
|
55
|
+
fetcher = GsImgFetcher::Fetcher.new(
|
56
|
+
GsImgFetcher::InputEntry.new('http://example.com/image.png'),
|
57
|
+
'output'
|
58
|
+
)
|
59
|
+
fetcher.fetch
|
60
|
+
fetcher.save
|
61
|
+
fetcher.successful?
|
62
|
+
```
|
63
|
+
|
64
|
+
#### Fetching multiple images
|
65
|
+
```ruby
|
66
|
+
input = GsImgFetcher::Input.from_file('urls.txt')
|
67
|
+
# or
|
68
|
+
urls = ['http://example.com/image.png', 'http://example.com/image2.png']
|
69
|
+
entries = urls.map { |url| GsImgFetcher::InputEntry.new(url) }
|
70
|
+
input = GsImgFetcher::Input.new (entries)
|
71
|
+
|
72
|
+
manager = GsImgFetcher::Manager.new(input, output_dir: 'output', async: false)
|
73
|
+
manager.setup.fetch
|
74
|
+
```
|
37
75
|
|
38
76
|
- `Manager` is what controls the entire process of handling the input and fetching and saving the images.
|
39
77
|
- `Input` is responsible for finding the input file and parsing, sanitizing and validating the list of URLs.
|
data/lib/gs_img_fetcher/input.rb
CHANGED
@@ -4,15 +4,20 @@ require 'active_support/core_ext/object/blank'
|
|
4
4
|
|
5
5
|
module GsImgFetcher
|
6
6
|
class Input
|
7
|
-
|
8
|
-
|
7
|
+
class << self
|
8
|
+
def from_file(path)
|
9
|
+
entries = File.open(path, 'r') { |f| f.each_line.map(&:strip).map(&:presence) }
|
10
|
+
.compact
|
11
|
+
.uniq
|
12
|
+
.map { |url| InputEntry.new(url) }
|
13
|
+
new(entries)
|
14
|
+
end
|
9
15
|
end
|
10
16
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
.map { |url| InputEntry.new(url) }
|
17
|
+
attr_reader :entries
|
18
|
+
|
19
|
+
def initialize(entries)
|
20
|
+
@entries = entries
|
16
21
|
end
|
17
22
|
|
18
23
|
def valid_entries
|
@@ -5,17 +5,18 @@ require 'concurrent'
|
|
5
5
|
module GsImgFetcher
|
6
6
|
class Manager
|
7
7
|
class << self
|
8
|
-
def fetch(
|
9
|
-
|
8
|
+
def fetch(input_path, **opts)
|
9
|
+
input = Input.from_file(input_path)
|
10
|
+
new(input, **opts).setup.fetch
|
10
11
|
end
|
11
12
|
end
|
12
13
|
|
13
|
-
attr_reader :entries, :successful_fetches, :failed_fetches
|
14
|
+
attr_reader :input, :entries, :successful_fetches, :failed_fetches
|
14
15
|
|
15
16
|
DEFAULT_MAX_THREADS = Concurrent.processor_count
|
16
17
|
|
17
|
-
def initialize(
|
18
|
-
@
|
18
|
+
def initialize(input, output_dir: nil, async: true, max_threads: nil)
|
19
|
+
@input = input
|
19
20
|
@output_dir = output_dir || Dir.pwd
|
20
21
|
@async = async
|
21
22
|
@max_threads = max_threads || DEFAULT_MAX_THREADS
|
@@ -24,10 +25,6 @@ module GsImgFetcher
|
|
24
25
|
@failed_fetches = Queue.new
|
25
26
|
end
|
26
27
|
|
27
|
-
def input
|
28
|
-
@input ||= Input.new(@input_path)
|
29
|
-
end
|
30
|
-
|
31
28
|
def setup
|
32
29
|
tap do
|
33
30
|
log_entries
|
@@ -81,7 +78,7 @@ module GsImgFetcher
|
|
81
78
|
end
|
82
79
|
|
83
80
|
def log_entries
|
84
|
-
LOGGER.info("Processing #{input.entries.count} URLs (#{input.valid_entries.count} valid, #{input.invalid_entries.count} invalid)
|
81
|
+
LOGGER.info("Processing #{input.entries.count} URLs (#{input.valid_entries.count} valid, #{input.invalid_entries.count} invalid)")
|
85
82
|
end
|
86
83
|
|
87
84
|
def log_result
|