scrapers 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4807185dd0cee3d545e7db022cc5d310298e8792
4
- data.tar.gz: d39450c7f4f6325eb73a864bdb476a9af3177d7a
3
+ metadata.gz: db591801f04aaf0af906b5b93790c8ee7521a6d1
4
+ data.tar.gz: a72274f8326f3f1ad4412b15dec1fd801d821bad
5
5
  SHA512:
6
- metadata.gz: 1a32d5e6a3cad4644a5a4ee34e817d650aeff240152e60a875562fc465697221676598f67d6b13707201ac014d5634afce4c1177571d2fea802c74b8295bbed4
7
- data.tar.gz: 67e51797361ad9c21dcc8d7c3200f3f3190ca98ce5402359f51aaefced4f41a34fadd8912e45480c49ce49e0baed2558570734413fa96e6f35f56f355011ed60
6
+ metadata.gz: 824c3b131c1bdcd1f4cfecba7b691eb1c07110075ca884452c2476614ec8d9466214c8c42e201e8516f04d67cdb96444840fbbe4015f4fdb5752501768d4f4c5
7
+ data.tar.gz: b7c9dcb97048074027e88d75f3fcedac78066b04a1cddf9b27dea075321523c31f4224842e14c770e37bf797c3064d3995321a4baa53b3e3574d64473611d100
data/.gitignore CHANGED
@@ -16,3 +16,4 @@ test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
18
  .rspec
19
+ .tapas
@@ -0,0 +1,65 @@
1
+ require 'fileutils'
2
+ require 'ostruct'
3
+
4
+ module Scrapers
5
+
6
+ module RubyTapas
7
+
8
+ module_function
9
+
10
+ # Save the post and attachments from an episode of RubyTapas
11
+ # in a directory determined from the episode title.
12
+ #
13
+ # Example:
14
+ # episode url: "https://rubytapas.dpdcart.com/subscriber/post?id=443"
15
+ # title: "177 Aliasing | RubyTapas"
16
+ # subdirectory: /177-aliasing
17
+ #
18
+ # Parameters:
19
+ #
20
+ # * *url* - url of the episode to download
21
+ # * *user* - username used to log into dpdcart
22
+ # * *pw* - password used with username
23
+ # * *dest* - destination directory to put episode subdirectory
24
+ #
25
+ def scrape(url=nil, user=nil, pw=nil, dest=".")
26
+ raise "Must give user and password for RubyTapas downloads" if user.to_s.empty? or pw.to_s.empty?
27
+ dest = File.realdirpath(dest)
28
+ raise "Destination #{dest} must be a writeable directory" unless File.directory?(dest) and File.writable?(dest)
29
+
30
+ Mechanize.start do |m|
31
+
32
+ tapas = OpenStruct.new
33
+
34
+ # First time, we will get redirected to the login page
35
+ m.get url
36
+ m.current_page.form.field_with(:name => "username").value = user
37
+ m.current_page.form.field_with(:name => "password").value = pw
38
+ m.current_page.form.submit
39
+
40
+ # Second time, we should land on episode page
41
+ m.get url
42
+ raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
43
+
44
+ m.current_page.tap do |page|
45
+ tapas.title = page.title.strip
46
+ tapas.episode_dir = File.join(dest,tapas.title.split("|").first.strip.downcase.gsub(%r{\s+},'-'))
47
+ tapas.attachments = page.links_with(:href => %r{\bdownload\b})
48
+ FileUtils.mkdir(tapas.episode_dir)
49
+ Dir.chdir(tapas.episode_dir) do |dir|
50
+ tapas.attachments.each do |att|
51
+ puts "fetching #{att.text}"
52
+ file = att.click
53
+ puts "saving #{file.filename}"
54
+ file.save
55
+ end
56
+ end
57
+ end
58
+
59
+ tapas
60
+
61
+ end
62
+ end
63
+ end
64
+
65
+ end
@@ -2,7 +2,7 @@ module Scrapers
2
2
  module Version
3
3
 
4
4
  MAJOR = 1
5
- MINOR = 1
5
+ MINOR = 2
6
6
  BUILD = 0
7
7
 
8
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapers
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tamara Temple
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-15 00:00:00.000000000 Z
11
+ date: 2014-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -145,6 +145,7 @@ files:
145
145
  - lib/scrapers/gocomics.rb
146
146
  - lib/scrapers/imgur.rb
147
147
  - lib/scrapers/nasa_apod.rb
148
+ - lib/scrapers/rubytapas.rb
148
149
  - lib/scrapers/sinfest.rb
149
150
  - lib/scrapers/version.rb
150
151
  - lib/scrapers/xkcd.rb
@@ -202,3 +203,4 @@ test_files:
202
203
  - spec/scrapers/xkcd_spec.rb
203
204
  - spec/scrapers_spec.rb
204
205
  - spec/spec_helper.rb
206
+ has_rdoc: