gs_img_fetcher 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +166 -0
- data/.rspec +3 -0
- data/.rubocop.yml +15 -0
- data/.ruby-version +1 -0
- data/.travis.yml +12 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +103 -0
- data/LICENSE.txt +21 -0
- data/README.md +50 -0
- data/Rakefile +8 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/exe/gs_img_fetcher +48 -0
- data/gs_img_fetcher.gemspec +42 -0
- data/lib/gs_img_fetcher/fetcher.rb +76 -0
- data/lib/gs_img_fetcher/input.rb +26 -0
- data/lib/gs_img_fetcher/input_entry.rb +27 -0
- data/lib/gs_img_fetcher/manager.rb +91 -0
- data/lib/gs_img_fetcher/version.rb +5 -0
- data/lib/gs_img_fetcher.rb +14 -0
- metadata +221 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: fbf77d3d6f77afcb8fa99b171c7fa0ea5aa3bcb729c1fc34c281af653b48f0a6
|
|
4
|
+
data.tar.gz: 1ccc20a72beded5b5699a6a965fa5ce0fd2b0e44ac0bf84bb96237e6b752bda2
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: cbeef36cae41f32123c024070984905931920c2cdf0cba6fb29da0ce15f81a13f135861e49607a52cb7a57fd8c3f111531c12c5f78b8a895cb9f0298cb8e3a1e
|
|
7
|
+
data.tar.gz: 9632ebe62a05cc0c1a931381f7c139d0957d57f4a76b75dcdb992921f52ac23e24e1a093ec2b94b1745615a131949caa4c9aa7efdb6b6efd8e44fd8d7f2625ed
|
data/.gitignore
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Created by https://www.gitignore.io/api/git,ruby,rubymine
|
|
2
|
+
# Edit at https://www.gitignore.io/?templates=git,ruby,rubymine
|
|
3
|
+
|
|
4
|
+
### Git ###
|
|
5
|
+
# Created by git for backups. To disable backups in Git:
|
|
6
|
+
# $ git config --global mergetool.keepBackup false
|
|
7
|
+
*.orig
|
|
8
|
+
|
|
9
|
+
# Created by git when using merge tools for conflicts
|
|
10
|
+
*.BACKUP.*
|
|
11
|
+
*.BASE.*
|
|
12
|
+
*.LOCAL.*
|
|
13
|
+
*.REMOTE.*
|
|
14
|
+
*_BACKUP_*.txt
|
|
15
|
+
*_BASE_*.txt
|
|
16
|
+
*_LOCAL_*.txt
|
|
17
|
+
*_REMOTE_*.txt
|
|
18
|
+
|
|
19
|
+
### Ruby ###
|
|
20
|
+
.rspec_status
|
|
21
|
+
*.gem
|
|
22
|
+
*.rbc
|
|
23
|
+
/.config
|
|
24
|
+
/coverage/
|
|
25
|
+
/InstalledFiles
|
|
26
|
+
/pkg/
|
|
27
|
+
/spec/reports/
|
|
28
|
+
/spec/examples.txt
|
|
29
|
+
/test/tmp/
|
|
30
|
+
/test/version_tmp/
|
|
31
|
+
/tmp/
|
|
32
|
+
|
|
33
|
+
# Used by dotenv library to load environment variables.
|
|
34
|
+
# .env
|
|
35
|
+
|
|
36
|
+
# Ignore Byebug command history file.
|
|
37
|
+
.byebug_history
|
|
38
|
+
|
|
39
|
+
## Specific to RubyMotion:
|
|
40
|
+
.dat*
|
|
41
|
+
.repl_history
|
|
42
|
+
build/
|
|
43
|
+
*.bridgesupport
|
|
44
|
+
build-iPhoneOS/
|
|
45
|
+
build-iPhoneSimulator/
|
|
46
|
+
|
|
47
|
+
## Specific to RubyMotion (use of CocoaPods):
|
|
48
|
+
#
|
|
49
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
|
50
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
|
51
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
|
52
|
+
# vendor/Pods/
|
|
53
|
+
|
|
54
|
+
## Documentation cache and generated files:
|
|
55
|
+
/.yardoc/
|
|
56
|
+
/_yardoc/
|
|
57
|
+
/doc/
|
|
58
|
+
/rdoc/
|
|
59
|
+
|
|
60
|
+
## Environment normalization:
|
|
61
|
+
/.bundle/
|
|
62
|
+
/vendor/bundle
|
|
63
|
+
/lib/bundler/man/
|
|
64
|
+
|
|
65
|
+
# for a library or gem, you might want to ignore these files since the code is
|
|
66
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
67
|
+
# Gemfile.lock
|
|
68
|
+
# .ruby-version
|
|
69
|
+
# .ruby-gemset
|
|
70
|
+
|
|
71
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
|
72
|
+
.rvmrc
|
|
73
|
+
|
|
74
|
+
### Ruby Patch ###
|
|
75
|
+
# Used by RuboCop. Remote config files pulled in from inherit_from directive.
|
|
76
|
+
# .rubocop-https?--*
|
|
77
|
+
|
|
78
|
+
### RubyMine ###
|
|
79
|
+
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
|
|
80
|
+
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
|
81
|
+
|
|
82
|
+
# User-specific stuff
|
|
83
|
+
.idea/**/workspace.xml
|
|
84
|
+
.idea/**/tasks.xml
|
|
85
|
+
.idea/**/usage.statistics.xml
|
|
86
|
+
.idea/**/dictionaries
|
|
87
|
+
.idea/**/shelf
|
|
88
|
+
|
|
89
|
+
# Generated files
|
|
90
|
+
.idea/**/contentModel.xml
|
|
91
|
+
|
|
92
|
+
# Sensitive or high-churn files
|
|
93
|
+
.idea/**/dataSources/
|
|
94
|
+
.idea/**/dataSources.ids
|
|
95
|
+
.idea/**/dataSources.local.xml
|
|
96
|
+
.idea/**/sqlDataSources.xml
|
|
97
|
+
.idea/**/dynamic.xml
|
|
98
|
+
.idea/**/uiDesigner.xml
|
|
99
|
+
.idea/**/dbnavigator.xml
|
|
100
|
+
|
|
101
|
+
# Gradle
|
|
102
|
+
.idea/**/gradle.xml
|
|
103
|
+
.idea/**/libraries
|
|
104
|
+
|
|
105
|
+
# Gradle and Maven with auto-import
|
|
106
|
+
# When using Gradle or Maven with auto-import, you should exclude module files,
|
|
107
|
+
# since they will be recreated, and may cause churn. Uncomment if using
|
|
108
|
+
# auto-import.
|
|
109
|
+
# .idea/modules.xml
|
|
110
|
+
# .idea/*.iml
|
|
111
|
+
# .idea/modules
|
|
112
|
+
# *.iml
|
|
113
|
+
# *.ipr
|
|
114
|
+
|
|
115
|
+
# CMake
|
|
116
|
+
cmake-build-*/
|
|
117
|
+
|
|
118
|
+
# Mongo Explorer plugin
|
|
119
|
+
.idea/**/mongoSettings.xml
|
|
120
|
+
|
|
121
|
+
# File-based project format
|
|
122
|
+
*.iws
|
|
123
|
+
|
|
124
|
+
# IntelliJ
|
|
125
|
+
out/
|
|
126
|
+
|
|
127
|
+
# mpeltonen/sbt-idea plugin
|
|
128
|
+
.idea_modules/
|
|
129
|
+
|
|
130
|
+
# JIRA plugin
|
|
131
|
+
atlassian-ide-plugin.xml
|
|
132
|
+
|
|
133
|
+
# Cursive Clojure plugin
|
|
134
|
+
.idea/replstate.xml
|
|
135
|
+
|
|
136
|
+
# Crashlytics plugin (for Android Studio and IntelliJ)
|
|
137
|
+
com_crashlytics_export_strings.xml
|
|
138
|
+
crashlytics.properties
|
|
139
|
+
crashlytics-build.properties
|
|
140
|
+
fabric.properties
|
|
141
|
+
|
|
142
|
+
# Editor-based Rest Client
|
|
143
|
+
.idea/httpRequests
|
|
144
|
+
|
|
145
|
+
# Android studio 3.1+ serialized cache file
|
|
146
|
+
.idea/caches/build_file_checksums.ser
|
|
147
|
+
|
|
148
|
+
### RubyMine Patch ###
|
|
149
|
+
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
|
150
|
+
|
|
151
|
+
# *.iml
|
|
152
|
+
# modules.xml
|
|
153
|
+
# .idea/misc.xml
|
|
154
|
+
# *.ipr
|
|
155
|
+
|
|
156
|
+
# Sonarlint plugin
|
|
157
|
+
.idea/**/sonarlint/
|
|
158
|
+
|
|
159
|
+
# SonarQube Plugin
|
|
160
|
+
.idea/**/sonarIssues.xml
|
|
161
|
+
|
|
162
|
+
# Markdown Navigator plugin
|
|
163
|
+
.idea/**/markdown-navigator.xml
|
|
164
|
+
.idea/**/markdown-navigator/
|
|
165
|
+
|
|
166
|
+
# End of https://www.gitignore.io/api/git,ruby,rubymine
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/.ruby-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.6.5
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
gs_img_fetcher (0.1.0)
|
|
5
|
+
activesupport (~> 6.0)
|
|
6
|
+
down (~> 5.1)
|
|
7
|
+
dry-cli (~> 0.6)
|
|
8
|
+
|
|
9
|
+
GEM
|
|
10
|
+
remote: https://rubygems.org/
|
|
11
|
+
specs:
|
|
12
|
+
activesupport (6.0.3)
|
|
13
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
14
|
+
i18n (>= 0.7, < 2)
|
|
15
|
+
minitest (~> 5.1)
|
|
16
|
+
tzinfo (~> 1.1)
|
|
17
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
|
18
|
+
addressable (2.7.0)
|
|
19
|
+
public_suffix (>= 2.0.2, < 5.0)
|
|
20
|
+
ast (2.4.0)
|
|
21
|
+
byebug (11.1.3)
|
|
22
|
+
coderay (1.1.2)
|
|
23
|
+
concurrent-ruby (1.1.6)
|
|
24
|
+
crack (0.4.3)
|
|
25
|
+
safe_yaml (~> 1.0.0)
|
|
26
|
+
diff-lcs (1.3)
|
|
27
|
+
docile (1.3.2)
|
|
28
|
+
down (5.1.1)
|
|
29
|
+
addressable (~> 2.5)
|
|
30
|
+
dry-cli (0.6.0)
|
|
31
|
+
concurrent-ruby (~> 1.0)
|
|
32
|
+
hashdiff (1.0.1)
|
|
33
|
+
i18n (1.8.2)
|
|
34
|
+
concurrent-ruby (~> 1.0)
|
|
35
|
+
method_source (1.0.0)
|
|
36
|
+
minitest (5.14.1)
|
|
37
|
+
parallel (1.19.1)
|
|
38
|
+
parser (2.7.1.2)
|
|
39
|
+
ast (~> 2.4.0)
|
|
40
|
+
pry (0.13.1)
|
|
41
|
+
coderay (~> 1.1)
|
|
42
|
+
method_source (~> 1.0)
|
|
43
|
+
pry-byebug (3.9.0)
|
|
44
|
+
byebug (~> 11.0)
|
|
45
|
+
pry (~> 0.13.0)
|
|
46
|
+
public_suffix (4.0.5)
|
|
47
|
+
rainbow (3.0.0)
|
|
48
|
+
rake (12.3.3)
|
|
49
|
+
rexml (3.2.4)
|
|
50
|
+
rspec (3.9.0)
|
|
51
|
+
rspec-core (~> 3.9.0)
|
|
52
|
+
rspec-expectations (~> 3.9.0)
|
|
53
|
+
rspec-mocks (~> 3.9.0)
|
|
54
|
+
rspec-core (3.9.2)
|
|
55
|
+
rspec-support (~> 3.9.3)
|
|
56
|
+
rspec-expectations (3.9.2)
|
|
57
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
58
|
+
rspec-support (~> 3.9.0)
|
|
59
|
+
rspec-mocks (3.9.1)
|
|
60
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
61
|
+
rspec-support (~> 3.9.0)
|
|
62
|
+
rspec-support (3.9.3)
|
|
63
|
+
rubocop (0.83.0)
|
|
64
|
+
parallel (~> 1.10)
|
|
65
|
+
parser (>= 2.7.0.1)
|
|
66
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
67
|
+
rexml
|
|
68
|
+
ruby-progressbar (~> 1.7)
|
|
69
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
|
70
|
+
rubocop-rspec (1.39.0)
|
|
71
|
+
rubocop (>= 0.68.1)
|
|
72
|
+
ruby-progressbar (1.10.1)
|
|
73
|
+
safe_yaml (1.0.5)
|
|
74
|
+
simplecov (0.18.5)
|
|
75
|
+
docile (~> 1.1)
|
|
76
|
+
simplecov-html (~> 0.11)
|
|
77
|
+
simplecov-html (0.12.2)
|
|
78
|
+
thread_safe (0.3.6)
|
|
79
|
+
tzinfo (1.2.7)
|
|
80
|
+
thread_safe (~> 0.1)
|
|
81
|
+
unicode-display_width (1.7.0)
|
|
82
|
+
webmock (3.8.3)
|
|
83
|
+
addressable (>= 2.3.6)
|
|
84
|
+
crack (>= 0.3.2)
|
|
85
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
|
86
|
+
zeitwerk (2.3.0)
|
|
87
|
+
|
|
88
|
+
PLATFORMS
|
|
89
|
+
ruby
|
|
90
|
+
|
|
91
|
+
DEPENDENCIES
|
|
92
|
+
concurrent-ruby (~> 1.1)
|
|
93
|
+
gs_img_fetcher!
|
|
94
|
+
pry-byebug
|
|
95
|
+
rake (~> 12.0)
|
|
96
|
+
rspec (~> 3.0)
|
|
97
|
+
rubocop (~> 0.83)
|
|
98
|
+
rubocop-rspec (~> 1.39)
|
|
99
|
+
simplecov
|
|
100
|
+
webmock
|
|
101
|
+
|
|
102
|
+
BUNDLED WITH
|
|
103
|
+
2.1.4
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2020 Akihiko Ito
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# GsImgFetcher [](https://travis-ci.org/AkihikoITOH/gs_img_fetcher)
|
|
2
|
+
|
|
3
|
+
`gs_img_fetcher` is a tool to download images from remote hosts and save them on your local storage.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Add this line to your application's Gemfile:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem 'gs_img_fetcher'
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
And then execute:
|
|
14
|
+
|
|
15
|
+
$ bundle install
|
|
16
|
+
|
|
17
|
+
Or install it yourself as:
|
|
18
|
+
|
|
19
|
+
$ gem install gs_img_fetcher
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
Let's say you have in your current directory a text file named `urls.txt` containing list of image URLs, each line containing one URL.
|
|
24
|
+
|
|
25
|
+
Running `gs_img_fetcher run urls.txt output` would take URLs from `urls.txt`, downloads the images and save them in the directory `output`.
|
|
26
|
+
|
|
27
|
+
Run `gs_img_fetcher --help` to show usage guide.
|
|
28
|
+
|
|
29
|
+
Set the environment variable `NOLOG` to a truthy value to suppress logs.
|
|
30
|
+
|
|
31
|
+
## Components
|
|
32
|
+
|
|
33
|
+
`gs_img_fetcher` is designed with concurrency in mind. It can be configured to fetch images either asynchronously or synchronously.
|
|
34
|
+
By default, it runs asynchronously and the maximum number of threads depends on what your machine allows.
|
|
35
|
+
For a relatively small input, it would be better to specify `--no-async` option.
|
|
36
|
+
Check out the options `async` and `max_threads`.
|
|
37
|
+
|
|
38
|
+
- `Manager` is what controls the entire process of handling the input and fetching and saving the images.
|
|
39
|
+
- `Input` is responsible for finding the input file and parsing, sanitizing and validating the list of URLs.
|
|
40
|
+
- `Fetcher` is responsible for downloading and saving images.
|
|
41
|
+
|
|
42
|
+
## Development
|
|
43
|
+
|
|
44
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rspec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
45
|
+
|
|
46
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
47
|
+
|
|
48
|
+
## License
|
|
49
|
+
|
|
50
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'bundler/setup'
|
|
5
|
+
require 'gs_img_fetcher'
|
|
6
|
+
|
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
9
|
+
|
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
11
|
+
# require "pry"
|
|
12
|
+
# Pry.start
|
|
13
|
+
|
|
14
|
+
require 'irb'
|
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/gs_img_fetcher
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'dry/cli'
|
|
5
|
+
require 'gs_img_fetcher'
|
|
6
|
+
|
|
7
|
+
module Commands
|
|
8
|
+
extend Dry::CLI::Registry
|
|
9
|
+
|
|
10
|
+
class Version < Dry::CLI::Command
|
|
11
|
+
desc 'Print version'
|
|
12
|
+
|
|
13
|
+
def call(_)
|
|
14
|
+
puts GsImgFetcher::VERSION
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class Run < Dry::CLI::Command
|
|
19
|
+
desc 'Fetch and save images'
|
|
20
|
+
|
|
21
|
+
argument :input_path,
|
|
22
|
+
required: true,
|
|
23
|
+
type: :string,
|
|
24
|
+
desc: 'Absolute or relative path to the plain text file listing URLs.'
|
|
25
|
+
argument :output_dir,
|
|
26
|
+
required: false,
|
|
27
|
+
type: :string,
|
|
28
|
+
desc: 'Absolute or relative path to the directory to save images in.'
|
|
29
|
+
option :async,
|
|
30
|
+
required: false,
|
|
31
|
+
default: true,
|
|
32
|
+
type: :boolean,
|
|
33
|
+
desc: 'If set to true, images will be fetched asynchronously.'
|
|
34
|
+
option :max_threads,
|
|
35
|
+
required: false,
|
|
36
|
+
type: :integer,
|
|
37
|
+
desc: 'Maximum number of threads.'
|
|
38
|
+
|
|
39
|
+
def call(input_path:, **opts)
|
|
40
|
+
GsImgFetcher::Manager.fetch(input_path, **opts.slice(:output_dir, :async, :max_threads))
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
register 'version', Version
|
|
45
|
+
register 'run', Run
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
Dry::CLI.new(Commands).call
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/gs_img_fetcher/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'gs_img_fetcher'
|
|
7
|
+
spec.version = GsImgFetcher::VERSION
|
|
8
|
+
spec.authors = ['Akihiko Ito']
|
|
9
|
+
spec.email = ['abc@akihiko.eu']
|
|
10
|
+
|
|
11
|
+
spec.summary = 'A CLI tool that fetches and saves images from URLs given via a plain text file.'
|
|
12
|
+
spec.description = ''
|
|
13
|
+
spec.homepage = 'https://github.com/AkihikoITOH/gs_img_fetcher'
|
|
14
|
+
spec.license = 'MIT'
|
|
15
|
+
spec.required_ruby_version = Gem::Requirement.new('>= 2.5.0')
|
|
16
|
+
|
|
17
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org/'
|
|
18
|
+
|
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
20
|
+
|
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
23
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
|
24
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
25
|
+
end
|
|
26
|
+
spec.bindir = 'exe'
|
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
28
|
+
spec.require_paths = ['lib']
|
|
29
|
+
|
|
30
|
+
spec.add_runtime_dependency 'activesupport', '~> 6.0'
|
|
31
|
+
spec.add_development_dependency 'concurrent-ruby', '~> 1.1'
|
|
32
|
+
spec.add_runtime_dependency 'down', '~> 5.1'
|
|
33
|
+
spec.add_runtime_dependency 'dry-cli', '~> 0.6'
|
|
34
|
+
|
|
35
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
|
36
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
37
|
+
spec.add_development_dependency 'rubocop', '~> 0.83'
|
|
38
|
+
spec.add_development_dependency 'rubocop-rspec', '~> 1.39'
|
|
39
|
+
spec.add_development_dependency 'pry-byebug'
|
|
40
|
+
spec.add_development_dependency 'simplecov'
|
|
41
|
+
spec.add_development_dependency 'webmock'
|
|
42
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require 'down'
|
|
5
|
+
|
|
6
|
+
module GsImgFetcher
|
|
7
|
+
class Fetcher
|
|
8
|
+
INITIALIZED = :initialized
|
|
9
|
+
FETCHED = :fetched
|
|
10
|
+
FETCH_FAILED = :fetch_failed
|
|
11
|
+
SAVED = :saved
|
|
12
|
+
SAVE_FAILED = :save_failed
|
|
13
|
+
|
|
14
|
+
attr_reader :state, :uuid
|
|
15
|
+
|
|
16
|
+
def initialize(entry, output_dir)
|
|
17
|
+
@entry = entry
|
|
18
|
+
@output_dir = output_dir
|
|
19
|
+
@uuid = SecureRandom.uuid
|
|
20
|
+
@state = INITIALIZED
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def fetch
|
|
24
|
+
@tempfile = Down.download(@entry.url)
|
|
25
|
+
@state = FETCHED
|
|
26
|
+
log_fetched
|
|
27
|
+
rescue Down::Error => e
|
|
28
|
+
@state = FETCH_FAILED
|
|
29
|
+
log_error(e)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def save
|
|
33
|
+
return unless fetched?
|
|
34
|
+
|
|
35
|
+
FileUtils.mkdir_p(@output_dir)
|
|
36
|
+
FileUtils.mv(tempfile.path, output_path)
|
|
37
|
+
@state = SAVED
|
|
38
|
+
log_saved
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
@state = SAVE_FAILED
|
|
41
|
+
log_error(e)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def output_path
|
|
45
|
+
@output_path ||= File.join(@output_dir, [uuid, @entry.extension].join('.'))
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def failed?
|
|
49
|
+
[FETCH_FAILED, SAVE_FAILED].include?(state)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def fetched?
|
|
53
|
+
FETCHED == state
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def successful?
|
|
57
|
+
SAVED == state
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
attr_reader :tempfile
|
|
63
|
+
|
|
64
|
+
def log_fetched
|
|
65
|
+
LOGGER.info("Successfully fetched #{@entry.url}")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def log_saved
|
|
69
|
+
LOGGER.info("Saved #{@entry.url} to #{output_path}")
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def log_error(err)
|
|
73
|
+
LOGGER.warn(err.message)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/object/blank'
|
|
4
|
+
|
|
5
|
+
module GsImgFetcher
|
|
6
|
+
class Input
|
|
7
|
+
def initialize(path)
|
|
8
|
+
@path = path
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def entries
|
|
12
|
+
@entries ||= File.open(@path, 'r') { |f| f.each_line.map(&:strip).map(&:presence) }
|
|
13
|
+
.compact
|
|
14
|
+
.uniq
|
|
15
|
+
.map { |url| InputEntry.new(url) }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def valid_entries
|
|
19
|
+
@valid_entries ||= entries.select(&:valid?)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def invalid_entries
|
|
23
|
+
@invalid_entries ||= entries.reject(&:valid?)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module GsImgFetcher
|
|
4
|
+
class InputEntry
|
|
5
|
+
ALLOWED_EXTENSIONS = %w[jpeg jpg png gif svg].freeze
|
|
6
|
+
|
|
7
|
+
attr_reader :url
|
|
8
|
+
|
|
9
|
+
def initialize(url)
|
|
10
|
+
@url = url
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def extension
|
|
14
|
+
@extension ||= ([filename.split('.').last&.downcase] & ALLOWED_EXTENSIONS).first
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def valid?
|
|
18
|
+
[filename, extension].all?(&:present?)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def filename
|
|
24
|
+
@filename ||= URI.parse(url).path.split('/').last || ''
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'concurrent'
|
|
4
|
+
|
|
5
|
+
module GsImgFetcher
|
|
6
|
+
class Manager
|
|
7
|
+
class << self
|
|
8
|
+
def fetch(*args, **opts)
|
|
9
|
+
new(*args, **opts).setup.fetch
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
attr_reader :entries, :successful_fetches, :failed_fetches
|
|
14
|
+
|
|
15
|
+
DEFAULT_MAX_THREADS = Concurrent.processor_count
|
|
16
|
+
|
|
17
|
+
def initialize(input_path, output_dir: nil, async: true, max_threads: nil)
|
|
18
|
+
@input_path = input_path
|
|
19
|
+
@output_dir = output_dir || Dir.pwd
|
|
20
|
+
@async = async
|
|
21
|
+
@max_threads = max_threads || DEFAULT_MAX_THREADS
|
|
22
|
+
@entries = Queue.new
|
|
23
|
+
@successful_fetches = Queue.new
|
|
24
|
+
@failed_fetches = Queue.new
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def input
|
|
28
|
+
@input ||= Input.new(@input_path)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def setup
|
|
32
|
+
tap do
|
|
33
|
+
log_entries
|
|
34
|
+
|
|
35
|
+
next unless entries.empty?
|
|
36
|
+
|
|
37
|
+
input.valid_entries.each { |e| entries.push(e) }
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def fetch
|
|
42
|
+
@async ? async_fetch : sync_fetch
|
|
43
|
+
|
|
44
|
+
log_result
|
|
45
|
+
successful_fetches.close
|
|
46
|
+
failed_fetches.close
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def async_fetch
|
|
52
|
+
with_thread_pool do |pool|
|
|
53
|
+
pool.post { fetch_one(entries.pop, @output_dir) } until entries.empty?
|
|
54
|
+
entries.close
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def sync_fetch
|
|
59
|
+
fetch_one(entries.pop, @output_dir) until entries.empty?
|
|
60
|
+
entries.close
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def with_thread_pool
|
|
64
|
+
Concurrent::FixedThreadPool.new([entries.size, @max_threads].min).tap do |pool|
|
|
65
|
+
yield(pool)
|
|
66
|
+
pool.shutdown
|
|
67
|
+
pool.wait_for_termination
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def fetch_one(entry, dir)
|
|
72
|
+
Fetcher.new(entry, dir).tap do |fetcher|
|
|
73
|
+
fetcher.fetch && fetcher.save
|
|
74
|
+
|
|
75
|
+
if fetcher.successful?
|
|
76
|
+
successful_fetches.push(fetcher)
|
|
77
|
+
else
|
|
78
|
+
failed_fetches.push(fetcher)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def log_entries
|
|
84
|
+
LOGGER.info("Processing #{input.entries.count} URLs (#{input.valid_entries.count} valid, #{input.invalid_entries.count} invalid) from #{@input_path}")
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def log_result
|
|
88
|
+
LOGGER.info("Fetch complete (#{successful_fetches.size} successful, #{failed_fetches.size} failed)")
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'logger'
|
|
4
|
+
require 'gs_img_fetcher/version'
|
|
5
|
+
require 'gs_img_fetcher/fetcher'
|
|
6
|
+
require 'gs_img_fetcher/input_entry'
|
|
7
|
+
require 'gs_img_fetcher/input'
|
|
8
|
+
require 'gs_img_fetcher/manager'
|
|
9
|
+
|
|
10
|
+
module GsImgFetcher
|
|
11
|
+
LOGGER = Logger.new(ENV['NOLOG'] ? '/dev/null' : STDOUT).tap { |l| l.level = Logger::INFO }
|
|
12
|
+
|
|
13
|
+
class Error < StandardError; end
|
|
14
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: gs_img_fetcher
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Akihiko Ito
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2020-05-17 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: activesupport
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '6.0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '6.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: concurrent-ruby
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.1'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.1'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: down
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '5.1'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '5.1'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: dry-cli
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0.6'
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0.6'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: rake
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '12.0'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '12.0'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: rspec
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - "~>"
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '3.0'
|
|
90
|
+
type: :development
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - "~>"
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '3.0'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: rubocop
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - "~>"
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '0.83'
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - "~>"
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '0.83'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: rubocop-rspec
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - "~>"
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '1.39'
|
|
118
|
+
type: :development
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - "~>"
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: '1.39'
|
|
125
|
+
- !ruby/object:Gem::Dependency
|
|
126
|
+
name: pry-byebug
|
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - ">="
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: '0'
|
|
132
|
+
type: :development
|
|
133
|
+
prerelease: false
|
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
135
|
+
requirements:
|
|
136
|
+
- - ">="
|
|
137
|
+
- !ruby/object:Gem::Version
|
|
138
|
+
version: '0'
|
|
139
|
+
- !ruby/object:Gem::Dependency
|
|
140
|
+
name: simplecov
|
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
|
142
|
+
requirements:
|
|
143
|
+
- - ">="
|
|
144
|
+
- !ruby/object:Gem::Version
|
|
145
|
+
version: '0'
|
|
146
|
+
type: :development
|
|
147
|
+
prerelease: false
|
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
149
|
+
requirements:
|
|
150
|
+
- - ">="
|
|
151
|
+
- !ruby/object:Gem::Version
|
|
152
|
+
version: '0'
|
|
153
|
+
- !ruby/object:Gem::Dependency
|
|
154
|
+
name: webmock
|
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
|
156
|
+
requirements:
|
|
157
|
+
- - ">="
|
|
158
|
+
- !ruby/object:Gem::Version
|
|
159
|
+
version: '0'
|
|
160
|
+
type: :development
|
|
161
|
+
prerelease: false
|
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
163
|
+
requirements:
|
|
164
|
+
- - ">="
|
|
165
|
+
- !ruby/object:Gem::Version
|
|
166
|
+
version: '0'
|
|
167
|
+
description: ''
|
|
168
|
+
email:
|
|
169
|
+
- abc@akihiko.eu
|
|
170
|
+
executables:
|
|
171
|
+
- gs_img_fetcher
|
|
172
|
+
extensions: []
|
|
173
|
+
extra_rdoc_files: []
|
|
174
|
+
files:
|
|
175
|
+
- ".gitignore"
|
|
176
|
+
- ".rspec"
|
|
177
|
+
- ".rubocop.yml"
|
|
178
|
+
- ".ruby-version"
|
|
179
|
+
- ".travis.yml"
|
|
180
|
+
- Gemfile
|
|
181
|
+
- Gemfile.lock
|
|
182
|
+
- LICENSE.txt
|
|
183
|
+
- README.md
|
|
184
|
+
- Rakefile
|
|
185
|
+
- bin/console
|
|
186
|
+
- bin/setup
|
|
187
|
+
- exe/gs_img_fetcher
|
|
188
|
+
- gs_img_fetcher.gemspec
|
|
189
|
+
- lib/gs_img_fetcher.rb
|
|
190
|
+
- lib/gs_img_fetcher/fetcher.rb
|
|
191
|
+
- lib/gs_img_fetcher/input.rb
|
|
192
|
+
- lib/gs_img_fetcher/input_entry.rb
|
|
193
|
+
- lib/gs_img_fetcher/manager.rb
|
|
194
|
+
- lib/gs_img_fetcher/version.rb
|
|
195
|
+
homepage: https://github.com/AkihikoITOH/gs_img_fetcher
|
|
196
|
+
licenses:
|
|
197
|
+
- MIT
|
|
198
|
+
metadata:
|
|
199
|
+
allowed_push_host: https://rubygems.org/
|
|
200
|
+
homepage_uri: https://github.com/AkihikoITOH/gs_img_fetcher
|
|
201
|
+
post_install_message:
|
|
202
|
+
rdoc_options: []
|
|
203
|
+
require_paths:
|
|
204
|
+
- lib
|
|
205
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
206
|
+
requirements:
|
|
207
|
+
- - ">="
|
|
208
|
+
- !ruby/object:Gem::Version
|
|
209
|
+
version: 2.5.0
|
|
210
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
211
|
+
requirements:
|
|
212
|
+
- - ">="
|
|
213
|
+
- !ruby/object:Gem::Version
|
|
214
|
+
version: '0'
|
|
215
|
+
requirements: []
|
|
216
|
+
rubygems_version: 3.0.3
|
|
217
|
+
signing_key:
|
|
218
|
+
specification_version: 4
|
|
219
|
+
summary: A CLI tool that fetches and saves images from URLs given via a plain text
|
|
220
|
+
file.
|
|
221
|
+
test_files: []
|