pmirror 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/README.md +11 -2
- data/features/pmirror.feature +19 -8
- data/lib/pmirror.rb +51 -30
- data/lib/pmirror/version.rb +1 -1
- metadata +2 -2
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -17,6 +17,9 @@ find it.
|
|
17
17
|
The tool is very new so it only has the bare minimum feature set:
|
18
18
|
|
19
19
|
- Specify multiple regex patterns to match against the remote directory
|
20
|
+
- Specify a list of URL's to look for patterns across, this means you
|
21
|
+
can look for the same patterns on multiple urls and aggregate those
|
22
|
+
files into one place
|
20
23
|
- Provides a progressbar download status indicator
|
21
24
|
- Specify a local directory to download files to
|
22
25
|
- Specify a command to execute on the local directory once all files are
|
@@ -53,6 +56,8 @@ Options:
|
|
53
56
|
-h, --help Show command line help
|
54
57
|
-p, --pattern PAT1,PAT2,PAT3 Regex to match files in remote dir,
|
55
58
|
may specify multiple patterns
|
59
|
+
-u --url One or more URL's to check for
|
60
|
+
files that match the defined patterns
|
56
61
|
-l, --localdir DIR Local directory to mirror files to
|
57
62
|
-e, --exec CMD Execute command after completion
|
58
63
|
-d, --debug Enable debugging
|
@@ -61,9 +66,13 @@ may specify multiple patterns
|
|
61
66
|
|
62
67
|
Usage should be pretty self explanatory but here are the details:
|
63
68
|
|
64
|
-
|
69
|
+
`--url` is the remote URL that you want to fetch files from. Right now
|
65
70
|
this is assumed to be an un-authenticated url. We do not recurse into
|
66
|
-
directories looking for files.
|
71
|
+
directories looking for files. You may specify more than one url and we
|
72
|
+
will look at each url and download any matching files from that url. If
|
73
|
+
the same filename is matched across multiple URL's only the first will
|
74
|
+
be downloaded, subsequent files will see that there is already a local
|
75
|
+
file with the same name and will not download.
|
67
76
|
|
68
77
|
`--pattern` allows you to specify a comma separated list of patterns to
|
69
78
|
match on the remote directly. We will iterate over each pattern and
|
data/features/pmirror.feature
CHANGED
@@ -14,25 +14,28 @@ Feature: My bootstrapped app kinda works
|
|
14
14
|
|--localdir|
|
15
15
|
|--exec|
|
16
16
|
|--version|
|
17
|
-
|
18
|
-
|
|
19
|
-
|
17
|
+
|--url|
|
18
|
+
|-p|
|
19
|
+
|-d|
|
20
|
+
|-l|
|
21
|
+
|-e|
|
22
|
+
|-u|
|
20
23
|
|
21
24
|
Scenario: Download a file
|
22
|
-
When I successfully run `pmirror -p meh -l ../foo http://localhost:55555`
|
25
|
+
When I successfully run `pmirror -p meh -l ../foo -u http://localhost:55555`
|
23
26
|
Then the exit status should be 0
|
24
27
|
And the following files should exist:
|
25
28
|
|../foo/meh.txt|
|
26
29
|
|
27
30
|
Scenario: Execute on local directory
|
28
|
-
When I successfully run `pmirror -p meh -l ../foo -e "touch test" http://localhost:55555`
|
31
|
+
When I successfully run `pmirror -p meh -l ../foo -e "touch test" -u http://localhost:55555`
|
29
32
|
Then the exit status should be 0
|
30
33
|
And the following files should exist:
|
31
34
|
|../foo/meh.txt|
|
32
35
|
|../foo/test |
|
33
36
|
|
34
37
|
Scenario: Match multiple files
|
35
|
-
When I successfully run `pmirror -p floo -l ../foo http://localhost:55555`
|
38
|
+
When I successfully run `pmirror -p floo -l ../foo -u http://localhost:55555`
|
36
39
|
Then the exit status should be 0
|
37
40
|
And the following files should exist:
|
38
41
|
| ../foo/floober.txt|
|
@@ -42,7 +45,7 @@ Feature: My bootstrapped app kinda works
|
|
42
45
|
| ../foo/meh.txt|
|
43
46
|
|
44
47
|
Scenario: Match multiple patterns
|
45
|
-
When I successfully run `pmirror -p '^floo.*','^mah.*' -l ../foo http://localhost:55555`
|
48
|
+
When I successfully run `pmirror -p '^floo.*','^mah.*' -l ../foo -u http://localhost:55555`
|
46
49
|
Then the exit status should be 0
|
47
50
|
And the following files should exist:
|
48
51
|
| ../foo/floober.txt|
|
@@ -51,5 +54,13 @@ Feature: My bootstrapped app kinda works
|
|
51
54
|
And the following files should not exist:
|
52
55
|
| ../foo/meh.txt|
|
53
56
|
|
54
|
-
|
57
|
+
Scenario: Match multiple urls
|
58
|
+
When I successfully run `pmirror -p '^floo.*','^mah.*' -l ../foo -u http://localhost:55555,http://localhost:55555`
|
59
|
+
Then the exit status should be 0
|
60
|
+
And the following files should exist:
|
61
|
+
| ../foo/floober.txt|
|
62
|
+
| ../foo/floobah.txt|
|
63
|
+
| ../foo/mah.txt|
|
64
|
+
And the following files should not exist:
|
65
|
+
| ../foo/meh.txt|
|
55
66
|
|
data/lib/pmirror.rb
CHANGED
@@ -9,15 +9,17 @@ module Pmirror
|
|
9
9
|
include Methadone::CLILogging
|
10
10
|
include Methadone::SH
|
11
11
|
|
12
|
-
main do
|
12
|
+
main do
|
13
|
+
d "Inside main"
|
13
14
|
|
14
|
-
|
15
|
+
download_list = get_download_list(options[:url], options[:pattern])
|
16
|
+
d "download_list: #{download_list.inspect}"
|
17
|
+
download_files(options[:localdir], download_list)
|
15
18
|
execute(options[:exec]) if options[:exec]
|
16
19
|
|
17
20
|
end
|
18
21
|
|
19
22
|
description "Mirror files on a remote http server based on pattern match"
|
20
|
-
arg("url", "Url or remote site", :one, :required)
|
21
23
|
on("-p", "--pattern PAT1,PAT2,PAT3", Array,
|
22
24
|
"Regex to match files in remote dir, may specify multiple patterns"
|
23
25
|
)
|
@@ -25,48 +27,66 @@ module Pmirror
|
|
25
27
|
on("-e", "--exec CMD", "Execute command after completion")
|
26
28
|
on("-d", "--debug", "Enable debugging")
|
27
29
|
on("-v", "--version", "Show version")
|
30
|
+
on("-u", "--url URL,URL", Array, "Url or remote site")
|
28
31
|
|
29
|
-
def self.
|
30
|
-
|
31
|
-
|
32
|
-
|
32
|
+
def self.d(msg)
|
33
|
+
if options[:debug]
|
34
|
+
puts "[DEBUG]: #{msg}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.get_download_list(url_list, pattern)
|
39
|
+
d "inside get_download_list"
|
40
|
+
downloads = {}
|
41
|
+
url_list.each do |single_url|
|
42
|
+
downloads[single_url] = []
|
43
|
+
d "Getting download list for url: #{single_url}"
|
44
|
+
page = Nokogiri::HTML(open(single_url))
|
33
45
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
46
|
+
page.css("a").each do |link|
|
47
|
+
file_name = link.attributes['href'].value
|
48
|
+
pattern.each do |matcher|
|
49
|
+
if /#{matcher}/.match(file_name)
|
50
|
+
d "Found match: #{file_name}"
|
51
|
+
downloads[single_url] << file_name
|
52
|
+
end
|
39
53
|
end
|
40
54
|
end
|
55
|
+
d "Returning downloads: #{downloads.inspect}"
|
41
56
|
end
|
42
57
|
downloads
|
43
58
|
end
|
44
59
|
|
45
|
-
def self.download_files(
|
46
|
-
|
47
|
-
|
60
|
+
def self.download_files(local_dir, url_hash={})
|
61
|
+
d "Inside download_files"
|
62
|
+
url_hash.each_key do |single_url|
|
63
|
+
d "Working on #{single_url}"
|
64
|
+
url_hash[single_url].each do |file|
|
65
|
+
local_fn = "#{local_dir}/#{file}"
|
48
66
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
67
|
+
unless Dir.exist? options[:localdir]
|
68
|
+
d "PWD: #{Dir.pwd}"
|
69
|
+
puts Dir.open(Dir.pwd).read
|
70
|
+
puts "Destination directory '#{options[:localdir]}' does not exist!"
|
71
|
+
exit 1
|
72
|
+
end
|
55
73
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
74
|
+
remote_fn = "#{single_url}/#{file}"
|
75
|
+
unless File.exist?(local_fn)
|
76
|
+
puts "Downloading File: #{file}"
|
77
|
+
puts "#{remote_fn} ==> #{local_fn}"
|
78
|
+
http_to_file(local_fn, remote_fn)
|
79
|
+
# File.write(local_fn, open(remote_fn).read)
|
80
|
+
puts "Download Complete for #{file}"
|
81
|
+
else
|
82
|
+
puts "Skipping #{file}, already exists"
|
83
|
+
end
|
65
84
|
end
|
66
85
|
end
|
67
86
|
end
|
68
87
|
|
69
88
|
def self.http_to_file(filename,url)
|
89
|
+
d "Inside http_to_file"
|
70
90
|
pbar = nil
|
71
91
|
File.open(filename, 'wb') do |save_file|
|
72
92
|
open(url, 'rb',
|
@@ -84,6 +104,7 @@ module Pmirror
|
|
84
104
|
end
|
85
105
|
|
86
106
|
def self.execute(cmd)
|
107
|
+
d "Inside execute"
|
87
108
|
puts "Executing: #{cmd}"
|
88
109
|
sh("cd #{options[:localdir]} && #{cmd}")
|
89
110
|
end
|
data/lib/pmirror/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pmirror
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|