get_tapas 0.9.2 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +8 -0
- data/exe/get-tapas +6 -2
- data/lib/get_tapas/downloader.rb +13 -1
- data/lib/get_tapas/page_parser.rb +13 -7
- data/lib/get_tapas/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13e579fd245dac6c5c005e32e58fed5a40daf655
|
4
|
+
data.tar.gz: 403989d84bf35a73f6ebdc752ed50138dbfd2557
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8400e1777e08589b7b3666b936b310e0e016122f38d4171be507d3d8fbce877a01a97d9322944c1f026d44750fd0d9743f3cbde6ac88b3b551c39d3bf65a8c07
|
7
|
+
data.tar.gz: d68aeadc9e3ad5da7b29516ee0b0815a821feb39abf4c80d64ed2e843cc9af401ce33bf137ae2043923f9597f48e38c02f2b5903ff3e1575ad7d05609bf4eb95
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -57,3 +57,11 @@ Usage: get-tapas [options]
|
|
57
57
|
-h, --help Show this message
|
58
58
|
```
|
59
59
|
|
60
|
+
For your convenience, the output directory defaults to `$HOME/ruby-tapas`.
|
61
|
+
|
62
|
+
## Validation of the Dowloaded Files
|
63
|
+
|
64
|
+
The script makes only a crude test to see that the file has been successfully downloaded
|
65
|
+
(see `Downloader#validate_downloaded_file`).
|
66
|
+
You may want to verify that yourself. If you find an error condition not already tested,
|
67
|
+
please let me know (directly or by posting an issue) and I may be able to add a test for it.
|
data/exe/get-tapas
CHANGED
@@ -30,6 +30,8 @@ end
|
|
30
30
|
|
31
31
|
def process_options
|
32
32
|
|
33
|
+
no_args_were_provided = ARGV.empty?
|
34
|
+
|
33
35
|
option_parser = nil
|
34
36
|
output_help_and_terminate = -> { puts option_parser; puts; exit(-1) }
|
35
37
|
|
@@ -78,7 +80,7 @@ def process_options
|
|
78
80
|
|
79
81
|
end.parse!
|
80
82
|
|
81
|
-
if
|
83
|
+
if no_args_were_provided
|
82
84
|
output_help_and_terminate.()
|
83
85
|
end
|
84
86
|
|
@@ -90,8 +92,10 @@ def get_html(options)
|
|
90
92
|
input = options.input_spec
|
91
93
|
if input == '-'
|
92
94
|
STDIN.read
|
93
|
-
|
95
|
+
elsif input.is_a?(String) && File.file?(input)
|
94
96
|
File.read(input)
|
97
|
+
else
|
98
|
+
raise "Invalid download list HTML input source: #{input.inspect}"
|
95
99
|
end
|
96
100
|
end
|
97
101
|
|
data/lib/get_tapas/downloader.rb
CHANGED
@@ -18,7 +18,7 @@ class Downloader
|
|
18
18
|
|
19
19
|
|
20
20
|
def ensure_output_dir_exists(dir)
|
21
|
-
return if Dir.
|
21
|
+
return if Dir.exist?(dir)
|
22
22
|
begin
|
23
23
|
FileUtils.mkdir_p(dir)
|
24
24
|
puts "Created output data directory #{dir}."
|
@@ -62,11 +62,23 @@ class Downloader
|
|
62
62
|
end
|
63
63
|
|
64
64
|
|
65
|
+
def validate_downloaded_file(filespec)
|
66
|
+
if File.size(filespec) < 20000
|
67
|
+
text = File.read(filespec)
|
68
|
+
if %r{<Error>}.match(text) && %r{</Error>}.match(text)
|
69
|
+
puts "\nDownload error, text was:\n#{text}\n\n\n"
|
70
|
+
raise "Download error"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
|
65
76
|
def download_file(link, data_dir)
|
66
77
|
puts "Downloading #{link.filespec}..."
|
67
78
|
tempfilespec = File.join(data_dir, 'tempfile')
|
68
79
|
`curl -o #{tempfilespec} #{Shellwords.shellescape(link.url)}`
|
69
80
|
if $?.exitstatus == 0
|
81
|
+
validate_downloaded_file(tempfilespec)
|
70
82
|
FileUtils.mv(tempfilespec, link.filespec)
|
71
83
|
puts "Finished downloading #{link.filename}\n\n"
|
72
84
|
else
|
@@ -7,20 +7,26 @@ module PageParser
|
|
7
7
|
|
8
8
|
# Example Input: "https://rubytapas-media.s3.amazonaws.com/298-file-find.mp4?response-content-disposition=...
|
9
9
|
# Example Return: '298-file-find.mp4'
|
10
|
-
|
10
|
+
def self.ruby_tapas_url_to_filename(url)
|
11
|
+
url.split('?').first.split('/').last
|
12
|
+
end
|
11
13
|
|
12
14
|
|
13
15
|
# @param html_string an HTML string from https://www.rubytapas.com/download-list/
|
14
16
|
# @return an array of DownloadLink instances.
|
15
|
-
def self.parse(html_string
|
17
|
+
def self.parse(html_string)
|
16
18
|
html_doc = Nokogiri::HTML(html_string)
|
17
19
|
html_links = html_doc.xpath("//*[contains(@class, 'video-download-link')]")
|
18
20
|
|
19
|
-
html_links.
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
if html_links.empty?
|
22
|
+
raise "No screencast links found. Are you sure about the input HTML source?"
|
23
|
+
else
|
24
|
+
html_links.map do |link|
|
25
|
+
url = link.children.first.attributes['href'].value
|
26
|
+
description = link.children.first.text.strip
|
27
|
+
filename = ruby_tapas_url_to_filename(url)
|
28
|
+
DownloadLink.new(url, filename, description)
|
29
|
+
end
|
24
30
|
end
|
25
31
|
end
|
26
32
|
end
|
data/lib/get_tapas/version.rb
CHANGED