download_files 0.0.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/download_files +21 -11
- data/download_files.gemspec +3 -4
- data/lib/download_files.rb +40 -9
- data/lib/download_files/version.rb +1 -1
- metadata +9 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 581b5cc0afb407f886aaebd62edb94c6d318b44c
|
4
|
+
data.tar.gz: 2677502f3b63318fd23855a130f55cad73619c9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7cfe3e0320976ff4b1d032ad8c64e0651f02905213a43784a95850c56dfec8b8792db3fe86bb680090d53daeaea99bc9b886de85767a4f0bced5f5a0f5120c78
|
7
|
+
data.tar.gz: 57e593f0d696f73c06b57f73cac499ee8091e7cc1e2cc9a8ce4ff900f023b0079c02ca92f9d76a45b7cd2fa123c67ab59db9ac943d9bf3f2de582744064974a0
|
data/bin/download_files
CHANGED
@@ -9,23 +9,33 @@ class App
|
|
9
9
|
include Methadone::Main
|
10
10
|
include Methadone::CLILogging
|
11
11
|
|
12
|
-
main do |page_address, pattern
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
12
|
+
main do |page_address, pattern|
|
13
|
+
FileUtils.mkdir_p(options['directory']) unless File.directory?(options['directory'])
|
14
|
+
DownloadFiles.download_files(
|
15
|
+
page_address,
|
16
|
+
pattern,
|
17
|
+
options['directory'],
|
18
|
+
options['ssl-verify'],
|
19
|
+
options['verbose'],
|
20
|
+
logger
|
21
|
+
)
|
19
22
|
end
|
20
23
|
|
21
|
-
# Declare command-line interface
|
22
24
|
description "Downloads files from the web page at the given address on the given domain that match the given regular expression pattern."
|
23
|
-
|
24
|
-
# Require an argument
|
25
|
+
|
25
26
|
arg :page_address
|
26
27
|
arg :pattern
|
27
28
|
arg :directory, :optional
|
28
|
-
|
29
|
+
|
30
|
+
options['directory'] = FileUtils.pwd
|
31
|
+
on('-d DIRECTORY', '--directory', 'Directory for scrapped content')
|
32
|
+
|
33
|
+
options['ssl-verify'] = true
|
34
|
+
on "--[no]-ssl-verify", 'Do no SSL verification of target website.'
|
35
|
+
|
36
|
+
options['verbose'] = false
|
37
|
+
on '-v', "--verbose", 'Verbose output'
|
38
|
+
|
29
39
|
version DownloadFiles::VERSION
|
30
40
|
|
31
41
|
use_log_level_option
|
data/download_files.gemspec
CHANGED
@@ -16,8 +16,7 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.version = DownloadFiles::VERSION
|
17
17
|
|
18
18
|
gem.add_development_dependency('rake', '~> 0.9.2')
|
19
|
-
|
20
|
-
gem.add_dependency('methadone', '~> 1.
|
21
|
-
gem.add_dependency('mechanize')
|
22
|
-
gem.add_dependency('nullobject')
|
19
|
+
|
20
|
+
gem.add_dependency('methadone', '~> 1.4.0')
|
21
|
+
gem.add_dependency('mechanize', '~> 2.7.3')
|
23
22
|
end
|
data/lib/download_files.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
require_relative "download_files/version"
|
2
2
|
|
3
3
|
require 'mechanize'
|
4
|
-
require 'nullobject'
|
5
4
|
|
6
5
|
class Mechanize
|
7
6
|
def self.start
|
8
7
|
yield(new)
|
9
8
|
end
|
10
9
|
|
11
|
-
def self.go(page_address)
|
10
|
+
def self.go(page_address, ssl_verify)
|
12
11
|
start do |agent|
|
12
|
+
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE unless ssl_verify
|
13
13
|
agent.get(page_address) do |page|
|
14
14
|
yield agent, page
|
15
15
|
end
|
@@ -18,24 +18,55 @@ class Mechanize
|
|
18
18
|
end
|
19
19
|
|
20
20
|
module DownloadFiles
|
21
|
-
def self.download_files(
|
22
|
-
|
21
|
+
def self.download_files(
|
22
|
+
page_address,
|
23
|
+
pattern,
|
24
|
+
dir,
|
25
|
+
ssl_verify,
|
26
|
+
is_verbose,
|
27
|
+
logger
|
28
|
+
)
|
29
|
+
logger.debug(
|
30
|
+
"DownloadFiles.download_files: " +
|
31
|
+
{
|
32
|
+
page_address: page_address,
|
33
|
+
pattern: pattern,
|
34
|
+
dir: dir,
|
35
|
+
ssl_verify: ssl_verify,
|
36
|
+
is_verbose: is_verbose,
|
37
|
+
logger: logger
|
38
|
+
}.inspect
|
39
|
+
)
|
23
40
|
|
24
|
-
abs_dir = File.
|
41
|
+
abs_dir = File.absolute_path(dir)
|
25
42
|
|
26
|
-
|
43
|
+
logger.info(
|
44
|
+
"Downloading files from \"#{page_address}\" to \"#{abs_dir}\" that match /#{pattern}/ " +
|
45
|
+
(if ssl_verify then 'requiring' else 'without requiring' end + ' SSL verification') +
|
46
|
+
'...'
|
47
|
+
)
|
48
|
+
|
49
|
+
Mechanize.go(page_address, ssl_verify) do |agent, page|
|
27
50
|
logger.debug page.inspect
|
28
|
-
|
51
|
+
|
52
|
+
if is_verbose
|
53
|
+
logger.info "On #{page.uri}"
|
54
|
+
end
|
29
55
|
|
30
56
|
agent.pluggable_parser.default = Mechanize::Download
|
31
57
|
|
32
58
|
page.links.each do |link|
|
33
|
-
|
59
|
+
if is_verbose
|
60
|
+
logger.info "Checking link #{link.href}"
|
61
|
+
end
|
34
62
|
|
35
63
|
next unless link.href && link.href.match(pattern)
|
36
64
|
|
37
65
|
full_file_name = File.expand_path(File.basename(link.href), abs_dir)
|
38
|
-
|
66
|
+
|
67
|
+
if is_verbose
|
68
|
+
logger.info "Downloading #{link.click.uri} to #{full_file_name}"
|
69
|
+
end
|
39
70
|
|
40
71
|
agent.get(link.click.uri).save(full_file_name)
|
41
72
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: download_files
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dustin Morrill
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-05-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,42 +30,28 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.
|
33
|
+
version: 1.4.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.
|
40
|
+
version: 1.4.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: mechanize
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - '>='
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: nullobject
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - '>='
|
45
|
+
- - ~>
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
47
|
+
version: 2.7.3
|
62
48
|
type: :runtime
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
|
-
- -
|
52
|
+
- - ~>
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
54
|
+
version: 2.7.3
|
69
55
|
description: Download all files at a particular URL that match a certain pattern.
|
70
56
|
email:
|
71
57
|
- dmorrill10@gmail.com
|
@@ -107,3 +93,4 @@ signing_key:
|
|
107
93
|
specification_version: 4
|
108
94
|
summary: Download all files at a particular URL that match a certain pattern.
|
109
95
|
test_files: []
|
96
|
+
has_rdoc:
|