scrapers 1.5.1 → 1.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rubytapas +29 -0
- data/lib/scrapers.rb +8 -1
- data/lib/scrapers/rubytapas.rb +32 -10
- data/lib/scrapers/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86724e63b99e28cc9e82a1c6806dce93515f9e3e
|
4
|
+
data.tar.gz: 7f1789114ac38d02a006e99bfb00294fa8c3c5b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c60d23ca256369982c57e8ad8099403042f411d9989694b3f9d32e65fa1307ba6452bf64d93b895059a457c44916aa2c90149903e075bdb76e0fe33fe640c0e5
|
7
|
+
data.tar.gz: 692ca67d599d2e846dc191827c5f77199f4c2de5d5b5415243995569f153681b80c72a72d910716af92d10c76c87775e705b3d0bc5b76191382092196ace3a70
|
data/bin/rubytapas
CHANGED
@@ -41,6 +41,35 @@ class RubyTapasDownload < Thor
|
|
41
41
|
Scrapers::RubyTapas.scrape url, user, pw, destination
|
42
42
|
end
|
43
43
|
|
44
|
+
desc "all", "Download all rubytapas episodes"
|
45
|
+
method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination of dowload", :default => '.'
|
46
|
+
method_option :url, :desc => "url of showlist", :default => 'https://rubytapas.dpdcart.com/subscriber/content'
|
47
|
+
method_option :user, :aliases => %w{-u -U}, :desc => "dpdcart user. Default is read from $HOME/.netrc"
|
48
|
+
method_option :password, :aliases => %w{-p -pw}, :desc => "dpdcart password. Default is read from $HOME/.netrc"
|
49
|
+
|
50
|
+
def all
|
51
|
+
STDERR.puts options.inspect
|
52
|
+
|
53
|
+
netrc = Netrc.read
|
54
|
+
user, pw = netrc[RUBYTAPAS]
|
55
|
+
user = options.fetch("user", user)
|
56
|
+
pw = options.fetch("password", pw)
|
57
|
+
url = options.fetch("url", nil)
|
58
|
+
destination = options.fetch("destination", nil)
|
59
|
+
STDERR.puts "destination: #{destination}, url: #{url}, user: #{user}, pw: #{pw.length}"
|
60
|
+
raise "Must give url" unless url
|
61
|
+
|
62
|
+
showlist_urls = Scrapers::RubyTapas.showlist(url, user, pw)
|
63
|
+
|
64
|
+
showlist_urls.each do |url|
|
65
|
+
Scrapers::RubyTapas.scrape url, user, pw, destination
|
66
|
+
print "pausing..."
|
67
|
+
sleep 5
|
68
|
+
puts "."
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
44
73
|
end
|
45
74
|
|
46
75
|
RubyTapasDownload.start
|
data/lib/scrapers.rb
CHANGED
@@ -1,9 +1,16 @@
|
|
1
1
|
require 'mechanize'
|
2
|
-
|
2
|
+
require 'uri'
|
3
3
|
Dir[File.join(File.expand_path('../', __FILE__),'**','*.rb')].each {|file| require file}
|
4
4
|
|
5
5
|
module Scrapers
|
6
6
|
def self.agent()
|
7
7
|
@agent ||= Mechanize.new
|
8
8
|
end
|
9
|
+
|
10
|
+
def self.base(url)
|
11
|
+
u = URI.parse(url)
|
12
|
+
u.path=''
|
13
|
+
u.to_s
|
14
|
+
end
|
15
|
+
|
9
16
|
end
|
data/lib/scrapers/rubytapas.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'fileutils'
|
2
2
|
require 'ostruct'
|
3
3
|
require 'mechanize'
|
4
|
+
require 'uri'
|
4
5
|
|
5
6
|
module Scrapers
|
6
7
|
|
@@ -32,20 +33,13 @@ module Scrapers
|
|
32
33
|
|
33
34
|
tapas = OpenStruct.new
|
34
35
|
|
35
|
-
|
36
|
-
m.get url
|
37
|
-
m.current_page.form.field_with(:name => "username").value = user
|
38
|
-
m.current_page.form.field_with(:name => "password").value = pw
|
39
|
-
m.current_page.form.submit
|
40
|
-
|
41
|
-
# Second time, we should land on episode page
|
42
|
-
m.get url
|
43
|
-
raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
|
36
|
+
m = self.login(m, url, user, pw)
|
44
37
|
|
45
38
|
m.current_page.tap do |page|
|
46
39
|
tapas.title = page.title.strip
|
47
40
|
tapas.episode_dir = File.join(dest,tapas.title.split("|").first.strip.downcase.gsub(%r{\s+},'-'))
|
48
41
|
tapas.attachments = page.links_with(:href => %r{\bdownload\b})
|
42
|
+
puts "Fetching and saving #{tapas.title} into #{tapas.episode_dir}"
|
49
43
|
FileUtils.mkdir(tapas.episode_dir)
|
50
44
|
Dir.chdir(tapas.episode_dir) do |dir|
|
51
45
|
tapas.attachments.each do |att|
|
@@ -61,6 +55,34 @@ module Scrapers
|
|
61
55
|
|
62
56
|
end
|
63
57
|
end
|
58
|
+
|
59
|
+
# retrieve a list of URLs for shows from the showlist
|
60
|
+
def self.showlist(showlist_url, user=nil, pw=nil)
|
61
|
+
raise "Must give showlist url, user, and password" if showlist_url.to_s.empty? || user.to_s.empty? || pw.to_s.empty?
|
62
|
+
|
63
|
+
Mechanize.start do |m|
|
64
|
+
m = self.login(m, showlist_url, user, pw)
|
65
|
+
links = m.current_page.links_with(:text => "Read More")
|
66
|
+
s = URI.parse(showlist_url)
|
67
|
+
s.path = ''
|
68
|
+
links.map{|l| "#{s}#{l.href}" }
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.login(m, url, user, pw)
|
75
|
+
# First time, we will get redirected to the login page
|
76
|
+
m.get url
|
77
|
+
m.current_page.form.field_with(:name => "username").value = user
|
78
|
+
m.current_page.form.field_with(:name => "password").value = pw
|
79
|
+
m.current_page.form.submit
|
80
|
+
|
81
|
+
# Second time, we should land on episode page
|
82
|
+
m.get url
|
83
|
+
raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
|
84
|
+
m
|
85
|
+
end
|
86
|
+
|
64
87
|
end
|
65
|
-
|
66
88
|
end
|
data/lib/scrapers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tamara Temple
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|