download_files 0.0.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2cc5ee3b9444c2c755a9be44f0e4f3245a2ecbcb
4
- data.tar.gz: 13d1f4e3d59ecbb1cbf72318779fb6e8bb65a9a5
3
+ metadata.gz: 581b5cc0afb407f886aaebd62edb94c6d318b44c
4
+ data.tar.gz: 2677502f3b63318fd23855a130f55cad73619c9a
5
5
  SHA512:
6
- metadata.gz: 17f4bf978f62f18889d74bc034527fd262beee9fa5f8c5d15ac76eb4ae653e18353d46ccfc79b7175296834f16341d1b3546ebd41c248f08323b49e1ac8ec6e3
7
- data.tar.gz: 715bf6f10e6a4187d575764825a8cc90ae86317cc7d0672be253df285d0017a19d7da2549b853d3398a22671f7d1e013c16bc3ad028ce390d6428c73259fb844
6
+ metadata.gz: 7cfe3e0320976ff4b1d032ad8c64e0651f02905213a43784a95850c56dfec8b8792db3fe86bb680090d53daeaea99bc9b886de85767a4f0bced5f5a0f5120c78
7
+ data.tar.gz: 57e593f0d696f73c06b57f73cac499ee8091e7cc1e2cc9a8ce4ff900f023b0079c02ca92f9d76a45b7cd2fa123c67ab59db9ac943d9bf3f2de582744064974a0
@@ -9,23 +9,33 @@ class App
9
9
  include Methadone::Main
10
10
  include Methadone::CLILogging
11
11
 
12
- main do |page_address, pattern, directory|
13
- dir = if directory
14
- FileUtils.mkdir_p(directory) unless File.directory?(directory)
15
- else
16
- FileUtils.pwd
17
- end
18
- DownloadFiles.download_files(page_address, pattern, dir, logger)
12
+ main do |page_address, pattern|
13
+ FileUtils.mkdir_p(options['directory']) unless File.directory?(options['directory'])
14
+ DownloadFiles.download_files(
15
+ page_address,
16
+ pattern,
17
+ options['directory'],
18
+ options['ssl-verify'],
19
+ options['verbose'],
20
+ logger
21
+ )
19
22
  end
20
23
 
21
- # Declare command-line interface
22
24
  description "Downloads files from the web page at the given address on the given domain that match the given regular expression pattern."
23
-
24
- # Require an argument
25
+
25
26
  arg :page_address
26
27
  arg :pattern
27
28
  arg :directory, :optional
28
-
29
+
30
+ options['directory'] = FileUtils.pwd
31
+ on('-d DIRECTORY', '--directory', 'Directory for scrapped content')
32
+
33
+ options['ssl-verify'] = true
34
+ on "--[no]-ssl-verify", 'Do no SSL verification of target website.'
35
+
36
+ options['verbose'] = false
37
+ on '-v', "--verbose", 'Verbose output'
38
+
29
39
  version DownloadFiles::VERSION
30
40
 
31
41
  use_log_level_option
@@ -16,8 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.version = DownloadFiles::VERSION
17
17
 
18
18
  gem.add_development_dependency('rake', '~> 0.9.2')
19
-
20
- gem.add_dependency('methadone', '~> 1.2.5')
21
- gem.add_dependency('mechanize')
22
- gem.add_dependency('nullobject')
19
+
20
+ gem.add_dependency('methadone', '~> 1.4.0')
21
+ gem.add_dependency('mechanize', '~> 2.7.3')
23
22
  end
@@ -1,15 +1,15 @@
1
1
  require_relative "download_files/version"
2
2
 
3
3
  require 'mechanize'
4
- require 'nullobject'
5
4
 
6
5
  class Mechanize
7
6
  def self.start
8
7
  yield(new)
9
8
  end
10
9
 
11
- def self.go(page_address)
10
+ def self.go(page_address, ssl_verify)
12
11
  start do |agent|
12
+ agent.verify_mode = OpenSSL::SSL::VERIFY_NONE unless ssl_verify
13
13
  agent.get(page_address) do |page|
14
14
  yield agent, page
15
15
  end
@@ -18,24 +18,55 @@ class Mechanize
18
18
  end
19
19
 
20
20
  module DownloadFiles
21
- def self.download_files(page_address, pattern, dir=FileUtils.pwd, logger=Null::Object.instance)
22
- logger.debug "DownloadFiles.download_files: " + {page_address: page_address, pattern: pattern, dir: dir, logger: logger}.inspect
21
+ def self.download_files(
22
+ page_address,
23
+ pattern,
24
+ dir,
25
+ ssl_verify,
26
+ is_verbose,
27
+ logger
28
+ )
29
+ logger.debug(
30
+ "DownloadFiles.download_files: " +
31
+ {
32
+ page_address: page_address,
33
+ pattern: pattern,
34
+ dir: dir,
35
+ ssl_verify: ssl_verify,
36
+ is_verbose: is_verbose,
37
+ logger: logger
38
+ }.inspect
39
+ )
23
40
 
24
- abs_dir = File.expand_path(dir, FileUtils.pwd)
41
+ abs_dir = File.absolute_path(dir)
25
42
 
26
- Mechanize.go(page_address) do |agent, page|
43
+ logger.info(
44
+ "Downloading files from \"#{page_address}\" to \"#{abs_dir}\" that match /#{pattern}/ " +
45
+ (if ssl_verify then 'requiring' else 'without requiring' end + ' SSL verification') +
46
+ '...'
47
+ )
48
+
49
+ Mechanize.go(page_address, ssl_verify) do |agent, page|
27
50
  logger.debug page.inspect
28
- logger.info "On #{page.uri}"
51
+
52
+ if is_verbose
53
+ logger.info "On #{page.uri}"
54
+ end
29
55
 
30
56
  agent.pluggable_parser.default = Mechanize::Download
31
57
 
32
58
  page.links.each do |link|
33
- logger.debug "Checking link #{link.href}"
59
+ if is_verbose
60
+ logger.info "Checking link #{link.href}"
61
+ end
34
62
 
35
63
  next unless link.href && link.href.match(pattern)
36
64
 
37
65
  full_file_name = File.expand_path(File.basename(link.href), abs_dir)
38
- logger.info "Downloading #{link.click.uri} to #{full_file_name}"
66
+
67
+ if is_verbose
68
+ logger.info "Downloading #{link.click.uri} to #{full_file_name}"
69
+ end
39
70
 
40
71
  agent.get(link.click.uri).save(full_file_name)
41
72
  end
@@ -1,3 +1,3 @@
1
1
  module DownloadFiles
2
- VERSION = "0.0.2"
2
+ VERSION = "1.0.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: download_files
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dustin Morrill
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-23 00:00:00.000000000 Z
11
+ date: 2014-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,42 +30,28 @@ dependencies:
30
30
  requirements:
31
31
  - - ~>
32
32
  - !ruby/object:Gem::Version
33
- version: 1.2.5
33
+ version: 1.4.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ~>
39
39
  - !ruby/object:Gem::Version
40
- version: 1.2.5
40
+ version: 1.4.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: mechanize
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - '>='
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: nullobject
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - '>='
45
+ - - ~>
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: 2.7.3
62
48
  type: :runtime
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - '>='
52
+ - - ~>
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: 2.7.3
69
55
  description: Download all files at a particular URL that match a certain pattern.
70
56
  email:
71
57
  - dmorrill10@gmail.com
@@ -107,3 +93,4 @@ signing_key:
107
93
  specification_version: 4
108
94
  summary: Download all files at a particular URL that match a certain pattern.
109
95
  test_files: []
96
+ has_rdoc: