scrapers 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4807185dd0cee3d545e7db022cc5d310298e8792
4
- data.tar.gz: d39450c7f4f6325eb73a864bdb476a9af3177d7a
3
+ metadata.gz: db591801f04aaf0af906b5b93790c8ee7521a6d1
4
+ data.tar.gz: a72274f8326f3f1ad4412b15dec1fd801d821bad
5
5
  SHA512:
6
- metadata.gz: 1a32d5e6a3cad4644a5a4ee34e817d650aeff240152e60a875562fc465697221676598f67d6b13707201ac014d5634afce4c1177571d2fea802c74b8295bbed4
7
- data.tar.gz: 67e51797361ad9c21dcc8d7c3200f3f3190ca98ce5402359f51aaefced4f41a34fadd8912e45480c49ce49e0baed2558570734413fa96e6f35f56f355011ed60
6
+ metadata.gz: 824c3b131c1bdcd1f4cfecba7b691eb1c07110075ca884452c2476614ec8d9466214c8c42e201e8516f04d67cdb96444840fbbe4015f4fdb5752501768d4f4c5
7
+ data.tar.gz: b7c9dcb97048074027e88d75f3fcedac78066b04a1cddf9b27dea075321523c31f4224842e14c770e37bf797c3064d3995321a4baa53b3e3574d64473611d100
data/.gitignore CHANGED
@@ -16,3 +16,4 @@ test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
18
  .rspec
19
+ .tapas
@@ -0,0 +1,65 @@
1
+ require 'fileutils'
2
+ require 'ostruct'
3
+
4
+ module Scrapers
5
+
6
+ module RubyTapas
7
+
8
+ module_function
9
+
10
+ # Save the post and attachments from an episode of RubyTapas
11
+ # in a directory determined from the episode title.
12
+ #
13
+ # Example:
14
+ # episode url: "https://rubytapas.dpdcart.com/subscriber/post?id=443"
15
+ # title: "177 Aliasing | RubyTapas"
16
+ # subdirectory: /177-aliasing
17
+ #
18
+ # Parameters:
19
+ #
20
+ # * *url* - url of the episode to download
21
+ # * *user* - username used to log into dpdcart
22
+ # * *pw* - password used with username
23
+ # * *dest* - destination directory to put episode subdirectory
24
+ #
25
+ def scrape(url=nil, user=nil, pw=nil, dest=".")
26
+ raise "Must give user and password for RubyTapas downloads" if user.to_s.empty? or pw.to_s.empty?
27
+ dest = File.realdirpath(dest)
28
+ raise "Destination #{dest} must be a writeable directory" unless File.directory?(dest) and File.writable?(dest)
29
+
30
+ Mechanize.start do |m|
31
+
32
+ tapas = OpenStruct.new
33
+
34
+ # First time, we will get redirected to the login page
35
+ m.get url
36
+ m.current_page.form.field_with(:name => "username").value = user
37
+ m.current_page.form.field_with(:name => "password").value = pw
38
+ m.current_page.form.submit
39
+
40
+ # Second time, we should land on episode page
41
+ m.get url
42
+ raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
43
+
44
+ m.current_page.tap do |page|
45
+ tapas.title = page.title.strip
46
+ tapas.episode_dir = File.join(dest,tapas.title.split("|").first.strip.downcase.gsub(%r{\s+},'-'))
47
+ tapas.attachments = page.links_with(:href => %r{\bdownload\b})
48
+ FileUtils.mkdir(tapas.episode_dir)
49
+ Dir.chdir(tapas.episode_dir) do |dir|
50
+ tapas.attachments.each do |att|
51
+ puts "fetching #{att.text}"
52
+ file = att.click
53
+ puts "saving #{file.filename}"
54
+ file.save
55
+ end
56
+ end
57
+ end
58
+
59
+ tapas
60
+
61
+ end
62
+ end
63
+ end
64
+
65
+ end
@@ -2,7 +2,7 @@ module Scrapers
2
2
  module Version
3
3
 
4
4
  MAJOR = 1
5
- MINOR = 1
5
+ MINOR = 2
6
6
  BUILD = 0
7
7
 
8
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapers
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tamara Temple
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-15 00:00:00.000000000 Z
11
+ date: 2014-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -145,6 +145,7 @@ files:
145
145
  - lib/scrapers/gocomics.rb
146
146
  - lib/scrapers/imgur.rb
147
147
  - lib/scrapers/nasa_apod.rb
148
+ - lib/scrapers/rubytapas.rb
148
149
  - lib/scrapers/sinfest.rb
149
150
  - lib/scrapers/version.rb
150
151
  - lib/scrapers/xkcd.rb
@@ -202,3 +203,4 @@ test_files:
202
203
  - spec/scrapers/xkcd_spec.rb
203
204
  - spec/scrapers_spec.rb
204
205
  - spec/spec_helper.rb
206
+ has_rdoc: