download_files 0.0.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/download_files +21 -11
- data/download_files.gemspec +3 -4
- data/lib/download_files.rb +40 -9
- data/lib/download_files/version.rb +1 -1
- metadata +9 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 581b5cc0afb407f886aaebd62edb94c6d318b44c
|
4
|
+
data.tar.gz: 2677502f3b63318fd23855a130f55cad73619c9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7cfe3e0320976ff4b1d032ad8c64e0651f02905213a43784a95850c56dfec8b8792db3fe86bb680090d53daeaea99bc9b886de85767a4f0bced5f5a0f5120c78
|
7
|
+
data.tar.gz: 57e593f0d696f73c06b57f73cac499ee8091e7cc1e2cc9a8ce4ff900f023b0079c02ca92f9d76a45b7cd2fa123c67ab59db9ac943d9bf3f2de582744064974a0
|
data/bin/download_files
CHANGED
@@ -9,23 +9,33 @@ class App
|
|
9
9
|
include Methadone::Main
|
10
10
|
include Methadone::CLILogging
|
11
11
|
|
12
|
-
main do |page_address, pattern
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
12
|
+
main do |page_address, pattern|
|
13
|
+
FileUtils.mkdir_p(options['directory']) unless File.directory?(options['directory'])
|
14
|
+
DownloadFiles.download_files(
|
15
|
+
page_address,
|
16
|
+
pattern,
|
17
|
+
options['directory'],
|
18
|
+
options['ssl-verify'],
|
19
|
+
options['verbose'],
|
20
|
+
logger
|
21
|
+
)
|
19
22
|
end
|
20
23
|
|
21
|
-
# Declare command-line interface
|
22
24
|
description "Downloads files from the web page at the given address on the given domain that match the given regular expression pattern."
|
23
|
-
|
24
|
-
# Require an argument
|
25
|
+
|
25
26
|
arg :page_address
|
26
27
|
arg :pattern
|
27
28
|
arg :directory, :optional
|
28
|
-
|
29
|
+
|
30
|
+
options['directory'] = FileUtils.pwd
|
31
|
+
on('-d DIRECTORY', '--directory', 'Directory for scrapped content')
|
32
|
+
|
33
|
+
options['ssl-verify'] = true
|
34
|
+
on "--[no]-ssl-verify", 'Do no SSL verification of target website.'
|
35
|
+
|
36
|
+
options['verbose'] = false
|
37
|
+
on '-v', "--verbose", 'Verbose output'
|
38
|
+
|
29
39
|
version DownloadFiles::VERSION
|
30
40
|
|
31
41
|
use_log_level_option
|
data/download_files.gemspec
CHANGED
@@ -16,8 +16,7 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.version = DownloadFiles::VERSION
|
17
17
|
|
18
18
|
gem.add_development_dependency('rake', '~> 0.9.2')
|
19
|
-
|
20
|
-
gem.add_dependency('methadone', '~> 1.
|
21
|
-
gem.add_dependency('mechanize')
|
22
|
-
gem.add_dependency('nullobject')
|
19
|
+
|
20
|
+
gem.add_dependency('methadone', '~> 1.4.0')
|
21
|
+
gem.add_dependency('mechanize', '~> 2.7.3')
|
23
22
|
end
|
data/lib/download_files.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
require_relative "download_files/version"
|
2
2
|
|
3
3
|
require 'mechanize'
|
4
|
-
require 'nullobject'
|
5
4
|
|
6
5
|
class Mechanize
|
7
6
|
def self.start
|
8
7
|
yield(new)
|
9
8
|
end
|
10
9
|
|
11
|
-
def self.go(page_address)
|
10
|
+
def self.go(page_address, ssl_verify)
|
12
11
|
start do |agent|
|
12
|
+
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE unless ssl_verify
|
13
13
|
agent.get(page_address) do |page|
|
14
14
|
yield agent, page
|
15
15
|
end
|
@@ -18,24 +18,55 @@ class Mechanize
|
|
18
18
|
end
|
19
19
|
|
20
20
|
module DownloadFiles
|
21
|
-
def self.download_files(
|
22
|
-
|
21
|
+
def self.download_files(
|
22
|
+
page_address,
|
23
|
+
pattern,
|
24
|
+
dir,
|
25
|
+
ssl_verify,
|
26
|
+
is_verbose,
|
27
|
+
logger
|
28
|
+
)
|
29
|
+
logger.debug(
|
30
|
+
"DownloadFiles.download_files: " +
|
31
|
+
{
|
32
|
+
page_address: page_address,
|
33
|
+
pattern: pattern,
|
34
|
+
dir: dir,
|
35
|
+
ssl_verify: ssl_verify,
|
36
|
+
is_verbose: is_verbose,
|
37
|
+
logger: logger
|
38
|
+
}.inspect
|
39
|
+
)
|
23
40
|
|
24
|
-
abs_dir = File.
|
41
|
+
abs_dir = File.absolute_path(dir)
|
25
42
|
|
26
|
-
|
43
|
+
logger.info(
|
44
|
+
"Downloading files from \"#{page_address}\" to \"#{abs_dir}\" that match /#{pattern}/ " +
|
45
|
+
(if ssl_verify then 'requiring' else 'without requiring' end + ' SSL verification') +
|
46
|
+
'...'
|
47
|
+
)
|
48
|
+
|
49
|
+
Mechanize.go(page_address, ssl_verify) do |agent, page|
|
27
50
|
logger.debug page.inspect
|
28
|
-
|
51
|
+
|
52
|
+
if is_verbose
|
53
|
+
logger.info "On #{page.uri}"
|
54
|
+
end
|
29
55
|
|
30
56
|
agent.pluggable_parser.default = Mechanize::Download
|
31
57
|
|
32
58
|
page.links.each do |link|
|
33
|
-
|
59
|
+
if is_verbose
|
60
|
+
logger.info "Checking link #{link.href}"
|
61
|
+
end
|
34
62
|
|
35
63
|
next unless link.href && link.href.match(pattern)
|
36
64
|
|
37
65
|
full_file_name = File.expand_path(File.basename(link.href), abs_dir)
|
38
|
-
|
66
|
+
|
67
|
+
if is_verbose
|
68
|
+
logger.info "Downloading #{link.click.uri} to #{full_file_name}"
|
69
|
+
end
|
39
70
|
|
40
71
|
agent.get(link.click.uri).save(full_file_name)
|
41
72
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: download_files
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dustin Morrill
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-05-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,42 +30,28 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.
|
33
|
+
version: 1.4.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.
|
40
|
+
version: 1.4.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: mechanize
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - '>='
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: nullobject
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - '>='
|
45
|
+
- - ~>
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
47
|
+
version: 2.7.3
|
62
48
|
type: :runtime
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
|
-
- -
|
52
|
+
- - ~>
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
54
|
+
version: 2.7.3
|
69
55
|
description: Download all files at a particular URL that match a certain pattern.
|
70
56
|
email:
|
71
57
|
- dmorrill10@gmail.com
|
@@ -107,3 +93,4 @@ signing_key:
|
|
107
93
|
specification_version: 4
|
108
94
|
summary: Download all files at a particular URL that match a certain pattern.
|
109
95
|
test_files: []
|
96
|
+
has_rdoc:
|