scrapers 1.5.1 → 1.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eb80fb9fb6be0a4c94bba33715ea5bafef20f941
4
- data.tar.gz: 066bec449349a457301154834289f3db74e257fe
3
+ metadata.gz: 86724e63b99e28cc9e82a1c6806dce93515f9e3e
4
+ data.tar.gz: 7f1789114ac38d02a006e99bfb00294fa8c3c5b1
5
5
  SHA512:
6
- metadata.gz: 1683f98572977ddbff0d92252f58ee53966a8cef7b362374ef5e04194771718cc4b84156dfac740f86e113f4cff7c112bdc1a4b1ab872e0bf488020cedf72d97
7
- data.tar.gz: cab26c4fd6438c5743dad7dfbcd7cd14d005c5f43d8fef40fc7cd4f47efabfca6d61543aa0a433cd9f8b585c26b4ab4d732927fcb251b47abd103b6ef6564f74
6
+ metadata.gz: c60d23ca256369982c57e8ad8099403042f411d9989694b3f9d32e65fa1307ba6452bf64d93b895059a457c44916aa2c90149903e075bdb76e0fe33fe640c0e5
7
+ data.tar.gz: 692ca67d599d2e846dc191827c5f77199f4c2de5d5b5415243995569f153681b80c72a72d910716af92d10c76c87775e705b3d0bc5b76191382092196ace3a70
data/bin/rubytapas CHANGED
@@ -41,6 +41,35 @@ class RubyTapasDownload < Thor
41
41
  Scrapers::RubyTapas.scrape url, user, pw, destination
42
42
  end
43
43
 
44
+ desc "all", "Download all rubytapas episodes"
45
+ method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination of dowload", :default => '.'
46
+ method_option :url, :desc => "url of showlist", :default => 'https://rubytapas.dpdcart.com/subscriber/content'
47
+ method_option :user, :aliases => %w{-u -U}, :desc => "dpdcart user. Default is read from $HOME/.netrc"
48
+ method_option :password, :aliases => %w{-p -pw}, :desc => "dpdcart password. Default is read from $HOME/.netrc"
49
+
50
+ def all
51
+ STDERR.puts options.inspect
52
+
53
+ netrc = Netrc.read
54
+ user, pw = netrc[RUBYTAPAS]
55
+ user = options.fetch("user", user)
56
+ pw = options.fetch("password", pw)
57
+ url = options.fetch("url", nil)
58
+ destination = options.fetch("destination", nil)
59
+ STDERR.puts "destination: #{destination}, url: #{url}, user: #{user}, pw: #{pw.length}"
60
+ raise "Must give url" unless url
61
+
62
+ showlist_urls = Scrapers::RubyTapas.showlist(url, user, pw)
63
+
64
+ showlist_urls.each do |url|
65
+ Scrapers::RubyTapas.scrape url, user, pw, destination
66
+ print "pausing..."
67
+ sleep 5
68
+ puts "."
69
+ end
70
+
71
+ end
72
+
44
73
  end
45
74
 
46
75
  RubyTapasDownload.start
data/lib/scrapers.rb CHANGED
@@ -1,9 +1,16 @@
1
1
  require 'mechanize'
2
-
2
+ require 'uri'
3
3
  Dir[File.join(File.expand_path('../', __FILE__),'**','*.rb')].each {|file| require file}
4
4
 
5
5
  module Scrapers
6
6
  def self.agent()
7
7
  @agent ||= Mechanize.new
8
8
  end
9
+
10
+ def self.base(url)
11
+ u = URI.parse(url)
12
+ u.path=''
13
+ u.to_s
14
+ end
15
+
9
16
  end
@@ -1,6 +1,7 @@
1
1
  require 'fileutils'
2
2
  require 'ostruct'
3
3
  require 'mechanize'
4
+ require 'uri'
4
5
 
5
6
  module Scrapers
6
7
 
@@ -32,20 +33,13 @@ module Scrapers
32
33
 
33
34
  tapas = OpenStruct.new
34
35
 
35
- # First time, we will get redirected to the login page
36
- m.get url
37
- m.current_page.form.field_with(:name => "username").value = user
38
- m.current_page.form.field_with(:name => "password").value = pw
39
- m.current_page.form.submit
40
-
41
- # Second time, we should land on episode page
42
- m.get url
43
- raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
36
+ m = self.login(m, url, user, pw)
44
37
 
45
38
  m.current_page.tap do |page|
46
39
  tapas.title = page.title.strip
47
40
  tapas.episode_dir = File.join(dest,tapas.title.split("|").first.strip.downcase.gsub(%r{\s+},'-'))
48
41
  tapas.attachments = page.links_with(:href => %r{\bdownload\b})
42
+ puts "Fetching and saving #{tapas.title} into #{tapas.episode_dir}"
49
43
  FileUtils.mkdir(tapas.episode_dir)
50
44
  Dir.chdir(tapas.episode_dir) do |dir|
51
45
  tapas.attachments.each do |att|
@@ -61,6 +55,34 @@ module Scrapers
61
55
 
62
56
  end
63
57
  end
58
+
59
+ # retrieve a list of URLs for shows from the showlist
60
+ def self.showlist(showlist_url, user=nil, pw=nil)
61
+ raise "Must give showlist url, user, and password" if showlist_url.to_s.empty? || user.to_s.empty? || pw.to_s.empty?
62
+
63
+ Mechanize.start do |m|
64
+ m = self.login(m, showlist_url, user, pw)
65
+ links = m.current_page.links_with(:text => "Read More")
66
+ s = URI.parse(showlist_url)
67
+ s.path = ''
68
+ links.map{|l| "#{s}#{l.href}" }
69
+ end
70
+
71
+
72
+ end
73
+
74
+ def self.login(m, url, user, pw)
75
+ # First time, we will get redirected to the login page
76
+ m.get url
77
+ m.current_page.form.field_with(:name => "username").value = user
78
+ m.current_page.form.field_with(:name => "password").value = pw
79
+ m.current_page.form.submit
80
+
81
+ # Second time, we should land on episode page
82
+ m.get url
83
+ raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
84
+ m
85
+ end
86
+
64
87
  end
65
-
66
88
  end
@@ -3,7 +3,7 @@ module Scrapers
3
3
 
4
4
  MAJOR = 1
5
5
  MINOR = 5
6
- BUILD = 1
6
+ BUILD = 2
7
7
 
8
8
  end
9
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapers
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tamara Temple
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-21 00:00:00.000000000 Z
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize