scrapers 1.5.1 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eb80fb9fb6be0a4c94bba33715ea5bafef20f941
4
- data.tar.gz: 066bec449349a457301154834289f3db74e257fe
3
+ metadata.gz: 86724e63b99e28cc9e82a1c6806dce93515f9e3e
4
+ data.tar.gz: 7f1789114ac38d02a006e99bfb00294fa8c3c5b1
5
5
  SHA512:
6
- metadata.gz: 1683f98572977ddbff0d92252f58ee53966a8cef7b362374ef5e04194771718cc4b84156dfac740f86e113f4cff7c112bdc1a4b1ab872e0bf488020cedf72d97
7
- data.tar.gz: cab26c4fd6438c5743dad7dfbcd7cd14d005c5f43d8fef40fc7cd4f47efabfca6d61543aa0a433cd9f8b585c26b4ab4d732927fcb251b47abd103b6ef6564f74
6
+ metadata.gz: c60d23ca256369982c57e8ad8099403042f411d9989694b3f9d32e65fa1307ba6452bf64d93b895059a457c44916aa2c90149903e075bdb76e0fe33fe640c0e5
7
+ data.tar.gz: 692ca67d599d2e846dc191827c5f77199f4c2de5d5b5415243995569f153681b80c72a72d910716af92d10c76c87775e705b3d0bc5b76191382092196ace3a70
data/bin/rubytapas CHANGED
@@ -41,6 +41,35 @@ class RubyTapasDownload < Thor
41
41
  Scrapers::RubyTapas.scrape url, user, pw, destination
42
42
  end
43
43
 
44
+ desc "all", "Download all rubytapas episodes"
45
+ method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination of dowload", :default => '.'
46
+ method_option :url, :desc => "url of showlist", :default => 'https://rubytapas.dpdcart.com/subscriber/content'
47
+ method_option :user, :aliases => %w{-u -U}, :desc => "dpdcart user. Default is read from $HOME/.netrc"
48
+ method_option :password, :aliases => %w{-p -pw}, :desc => "dpdcart password. Default is read from $HOME/.netrc"
49
+
50
+ def all
51
+ STDERR.puts options.inspect
52
+
53
+ netrc = Netrc.read
54
+ user, pw = netrc[RUBYTAPAS]
55
+ user = options.fetch("user", user)
56
+ pw = options.fetch("password", pw)
57
+ url = options.fetch("url", nil)
58
+ destination = options.fetch("destination", nil)
59
+ STDERR.puts "destination: #{destination}, url: #{url}, user: #{user}, pw: #{pw.length}"
60
+ raise "Must give url" unless url
61
+
62
+ showlist_urls = Scrapers::RubyTapas.showlist(url, user, pw)
63
+
64
+ showlist_urls.each do |url|
65
+ Scrapers::RubyTapas.scrape url, user, pw, destination
66
+ print "pausing..."
67
+ sleep 5
68
+ puts "."
69
+ end
70
+
71
+ end
72
+
44
73
  end
45
74
 
46
75
  RubyTapasDownload.start
data/lib/scrapers.rb CHANGED
@@ -1,9 +1,16 @@
1
1
  require 'mechanize'
2
-
2
+ require 'uri'
3
3
  Dir[File.join(File.expand_path('../', __FILE__),'**','*.rb')].each {|file| require file}
4
4
 
5
5
  module Scrapers
6
6
  def self.agent()
7
7
  @agent ||= Mechanize.new
8
8
  end
9
+
10
+ def self.base(url)
11
+ u = URI.parse(url)
12
+ u.path=''
13
+ u.to_s
14
+ end
15
+
9
16
  end
@@ -1,6 +1,7 @@
1
1
  require 'fileutils'
2
2
  require 'ostruct'
3
3
  require 'mechanize'
4
+ require 'uri'
4
5
 
5
6
  module Scrapers
6
7
 
@@ -32,20 +33,13 @@ module Scrapers
32
33
 
33
34
  tapas = OpenStruct.new
34
35
 
35
- # First time, we will get redirected to the login page
36
- m.get url
37
- m.current_page.form.field_with(:name => "username").value = user
38
- m.current_page.form.field_with(:name => "password").value = pw
39
- m.current_page.form.submit
40
-
41
- # Second time, we should land on episode page
42
- m.get url
43
- raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
36
+ m = self.login(m, url, user, pw)
44
37
 
45
38
  m.current_page.tap do |page|
46
39
  tapas.title = page.title.strip
47
40
  tapas.episode_dir = File.join(dest,tapas.title.split("|").first.strip.downcase.gsub(%r{\s+},'-'))
48
41
  tapas.attachments = page.links_with(:href => %r{\bdownload\b})
42
+ puts "Fetching and saving #{tapas.title} into #{tapas.episode_dir}"
49
43
  FileUtils.mkdir(tapas.episode_dir)
50
44
  Dir.chdir(tapas.episode_dir) do |dir|
51
45
  tapas.attachments.each do |att|
@@ -61,6 +55,34 @@ module Scrapers
61
55
 
62
56
  end
63
57
  end
58
+
59
+ # retrieve a list of URLs for shows from the showlist
60
+ def self.showlist(showlist_url, user=nil, pw=nil)
61
+ raise "Must give showlist url, user, and password" if showlist_url.to_s.empty? || user.to_s.empty? || pw.to_s.empty?
62
+
63
+ Mechanize.start do |m|
64
+ m = self.login(m, showlist_url, user, pw)
65
+ links = m.current_page.links_with(:text => "Read More")
66
+ s = URI.parse(showlist_url)
67
+ s.path = ''
68
+ links.map{|l| "#{s}#{l.href}" }
69
+ end
70
+
71
+
72
+ end
73
+
74
+ def self.login(m, url, user, pw)
75
+ # First time, we will get redirected to the login page
76
+ m.get url
77
+ m.current_page.form.field_with(:name => "username").value = user
78
+ m.current_page.form.field_with(:name => "password").value = pw
79
+ m.current_page.form.submit
80
+
81
+ # Second time, we should land on episode page
82
+ m.get url
83
+ raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
84
+ m
85
+ end
86
+
64
87
  end
65
-
66
88
  end
@@ -3,7 +3,7 @@ module Scrapers
3
3
 
4
4
  MAJOR = 1
5
5
  MINOR = 5
6
- BUILD = 1
6
+ BUILD = 2
7
7
 
8
8
  end
9
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapers
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tamara Temple
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-21 00:00:00.000000000 Z
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize