download_files 0.0.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2cc5ee3b9444c2c755a9be44f0e4f3245a2ecbcb
4
- data.tar.gz: 13d1f4e3d59ecbb1cbf72318779fb6e8bb65a9a5
3
+ metadata.gz: 581b5cc0afb407f886aaebd62edb94c6d318b44c
4
+ data.tar.gz: 2677502f3b63318fd23855a130f55cad73619c9a
5
5
  SHA512:
6
- metadata.gz: 17f4bf978f62f18889d74bc034527fd262beee9fa5f8c5d15ac76eb4ae653e18353d46ccfc79b7175296834f16341d1b3546ebd41c248f08323b49e1ac8ec6e3
7
- data.tar.gz: 715bf6f10e6a4187d575764825a8cc90ae86317cc7d0672be253df285d0017a19d7da2549b853d3398a22671f7d1e013c16bc3ad028ce390d6428c73259fb844
6
+ metadata.gz: 7cfe3e0320976ff4b1d032ad8c64e0651f02905213a43784a95850c56dfec8b8792db3fe86bb680090d53daeaea99bc9b886de85767a4f0bced5f5a0f5120c78
7
+ data.tar.gz: 57e593f0d696f73c06b57f73cac499ee8091e7cc1e2cc9a8ce4ff900f023b0079c02ca92f9d76a45b7cd2fa123c67ab59db9ac943d9bf3f2de582744064974a0
@@ -9,23 +9,33 @@ class App
9
9
  include Methadone::Main
10
10
  include Methadone::CLILogging
11
11
 
12
- main do |page_address, pattern, directory|
13
- dir = if directory
14
- FileUtils.mkdir_p(directory) unless File.directory?(directory)
15
- else
16
- FileUtils.pwd
17
- end
18
- DownloadFiles.download_files(page_address, pattern, dir, logger)
12
+ main do |page_address, pattern|
13
+ FileUtils.mkdir_p(options['directory']) unless File.directory?(options['directory'])
14
+ DownloadFiles.download_files(
15
+ page_address,
16
+ pattern,
17
+ options['directory'],
18
+ options['ssl-verify'],
19
+ options['verbose'],
20
+ logger
21
+ )
19
22
  end
20
23
 
21
- # Declare command-line interface
22
24
  description "Downloads files from the web page at the given address on the given domain that match the given regular expression pattern."
23
-
24
- # Require an argument
25
+
25
26
  arg :page_address
26
27
  arg :pattern
27
28
  arg :directory, :optional
28
-
29
+
30
+ options['directory'] = FileUtils.pwd
31
+ on('-d DIRECTORY', '--directory', 'Directory for scrapped content')
32
+
33
+ options['ssl-verify'] = true
34
+ on "--[no]-ssl-verify", 'Do no SSL verification of target website.'
35
+
36
+ options['verbose'] = false
37
+ on '-v', "--verbose", 'Verbose output'
38
+
29
39
  version DownloadFiles::VERSION
30
40
 
31
41
  use_log_level_option
@@ -16,8 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.version = DownloadFiles::VERSION
17
17
 
18
18
  gem.add_development_dependency('rake', '~> 0.9.2')
19
-
20
- gem.add_dependency('methadone', '~> 1.2.5')
21
- gem.add_dependency('mechanize')
22
- gem.add_dependency('nullobject')
19
+
20
+ gem.add_dependency('methadone', '~> 1.4.0')
21
+ gem.add_dependency('mechanize', '~> 2.7.3')
23
22
  end
@@ -1,15 +1,15 @@
1
1
  require_relative "download_files/version"
2
2
 
3
3
  require 'mechanize'
4
- require 'nullobject'
5
4
 
6
5
  class Mechanize
7
6
  def self.start
8
7
  yield(new)
9
8
  end
10
9
 
11
- def self.go(page_address)
10
+ def self.go(page_address, ssl_verify)
12
11
  start do |agent|
12
+ agent.verify_mode = OpenSSL::SSL::VERIFY_NONE unless ssl_verify
13
13
  agent.get(page_address) do |page|
14
14
  yield agent, page
15
15
  end
@@ -18,24 +18,55 @@ class Mechanize
18
18
  end
19
19
 
20
20
  module DownloadFiles
21
- def self.download_files(page_address, pattern, dir=FileUtils.pwd, logger=Null::Object.instance)
22
- logger.debug "DownloadFiles.download_files: " + {page_address: page_address, pattern: pattern, dir: dir, logger: logger}.inspect
21
+ def self.download_files(
22
+ page_address,
23
+ pattern,
24
+ dir,
25
+ ssl_verify,
26
+ is_verbose,
27
+ logger
28
+ )
29
+ logger.debug(
30
+ "DownloadFiles.download_files: " +
31
+ {
32
+ page_address: page_address,
33
+ pattern: pattern,
34
+ dir: dir,
35
+ ssl_verify: ssl_verify,
36
+ is_verbose: is_verbose,
37
+ logger: logger
38
+ }.inspect
39
+ )
23
40
 
24
- abs_dir = File.expand_path(dir, FileUtils.pwd)
41
+ abs_dir = File.absolute_path(dir)
25
42
 
26
- Mechanize.go(page_address) do |agent, page|
43
+ logger.info(
44
+ "Downloading files from \"#{page_address}\" to \"#{abs_dir}\" that match /#{pattern}/ " +
45
+ (if ssl_verify then 'requiring' else 'without requiring' end + ' SSL verification') +
46
+ '...'
47
+ )
48
+
49
+ Mechanize.go(page_address, ssl_verify) do |agent, page|
27
50
  logger.debug page.inspect
28
- logger.info "On #{page.uri}"
51
+
52
+ if is_verbose
53
+ logger.info "On #{page.uri}"
54
+ end
29
55
 
30
56
  agent.pluggable_parser.default = Mechanize::Download
31
57
 
32
58
  page.links.each do |link|
33
- logger.debug "Checking link #{link.href}"
59
+ if is_verbose
60
+ logger.info "Checking link #{link.href}"
61
+ end
34
62
 
35
63
  next unless link.href && link.href.match(pattern)
36
64
 
37
65
  full_file_name = File.expand_path(File.basename(link.href), abs_dir)
38
- logger.info "Downloading #{link.click.uri} to #{full_file_name}"
66
+
67
+ if is_verbose
68
+ logger.info "Downloading #{link.click.uri} to #{full_file_name}"
69
+ end
39
70
 
40
71
  agent.get(link.click.uri).save(full_file_name)
41
72
  end
@@ -1,3 +1,3 @@
1
1
  module DownloadFiles
2
- VERSION = "0.0.2"
2
+ VERSION = "1.0.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: download_files
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dustin Morrill
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-23 00:00:00.000000000 Z
11
+ date: 2014-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,42 +30,28 @@ dependencies:
30
30
  requirements:
31
31
  - - ~>
32
32
  - !ruby/object:Gem::Version
33
- version: 1.2.5
33
+ version: 1.4.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ~>
39
39
  - !ruby/object:Gem::Version
40
- version: 1.2.5
40
+ version: 1.4.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: mechanize
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - '>='
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: nullobject
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - '>='
45
+ - - ~>
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: 2.7.3
62
48
  type: :runtime
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - '>='
52
+ - - ~>
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: 2.7.3
69
55
  description: Download all files at a particular URL that match a certain pattern.
70
56
  email:
71
57
  - dmorrill10@gmail.com
@@ -107,3 +93,4 @@ signing_key:
107
93
  specification_version: 4
108
94
  summary: Download all files at a particular URL that match a certain pattern.
109
95
  test_files: []
96
+ has_rdoc: