scrapers 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/scrapers/rubytapas.rb +65 -0
- data/lib/scrapers/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db591801f04aaf0af906b5b93790c8ee7521a6d1
|
4
|
+
data.tar.gz: a72274f8326f3f1ad4412b15dec1fd801d821bad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 824c3b131c1bdcd1f4cfecba7b691eb1c07110075ca884452c2476614ec8d9466214c8c42e201e8516f04d67cdb96444840fbbe4015f4fdb5752501768d4f4c5
|
7
|
+
data.tar.gz: b7c9dcb97048074027e88d75f3fcedac78066b04a1cddf9b27dea075321523c31f4224842e14c770e37bf797c3064d3995321a4baa53b3e3574d64473611d100
|
data/.gitignore
CHANGED
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module Scrapers
|
5
|
+
|
6
|
+
module RubyTapas
|
7
|
+
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# Save the post and attachments from an episode of RubyTapas
|
11
|
+
# in a directory determined from the episode title.
|
12
|
+
#
|
13
|
+
# Example:
|
14
|
+
# episode url: "https://rubytapas.dpdcart.com/subscriber/post?id=443"
|
15
|
+
# title: "177 Aliasing | RubyTapas"
|
16
|
+
# subdirectory: /177-aliasing
|
17
|
+
#
|
18
|
+
# Parameters:
|
19
|
+
#
|
20
|
+
# * *url* - url of the episode to download
|
21
|
+
# * *user* - username used to log into dpdcart
|
22
|
+
# * *pw* - password used with username
|
23
|
+
# * *dest* - destination directory to put episode subdirectory
|
24
|
+
#
|
25
|
+
def scrape(url=nil, user=nil, pw=nil, dest=".")
|
26
|
+
raise "Must give user and password for RubyTapas downloads" if user.to_s.empty? or pw.to_s.empty?
|
27
|
+
dest = File.realdirpath(dest)
|
28
|
+
raise "Destination #{dest} must be a writeable directory" unless File.directory?(dest) and File.writable?(dest)
|
29
|
+
|
30
|
+
Mechanize.start do |m|
|
31
|
+
|
32
|
+
tapas = OpenStruct.new
|
33
|
+
|
34
|
+
# First time, we will get redirected to the login page
|
35
|
+
m.get url
|
36
|
+
m.current_page.form.field_with(:name => "username").value = user
|
37
|
+
m.current_page.form.field_with(:name => "password").value = pw
|
38
|
+
m.current_page.form.submit
|
39
|
+
|
40
|
+
# Second time, we should land on episode page
|
41
|
+
m.get url
|
42
|
+
raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
|
43
|
+
|
44
|
+
m.current_page.tap do |page|
|
45
|
+
tapas.title = page.title.strip
|
46
|
+
tapas.episode_dir = File.join(dest,tapas.title.split("|").first.strip.downcase.gsub(%r{\s+},'-'))
|
47
|
+
tapas.attachments = page.links_with(:href => %r{\bdownload\b})
|
48
|
+
FileUtils.mkdir(tapas.episode_dir)
|
49
|
+
Dir.chdir(tapas.episode_dir) do |dir|
|
50
|
+
tapas.attachments.each do |att|
|
51
|
+
puts "fetching #{att.text}"
|
52
|
+
file = att.click
|
53
|
+
puts "saving #{file.filename}"
|
54
|
+
file.save
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
tapas
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
data/lib/scrapers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tamara Temple
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -145,6 +145,7 @@ files:
|
|
145
145
|
- lib/scrapers/gocomics.rb
|
146
146
|
- lib/scrapers/imgur.rb
|
147
147
|
- lib/scrapers/nasa_apod.rb
|
148
|
+
- lib/scrapers/rubytapas.rb
|
148
149
|
- lib/scrapers/sinfest.rb
|
149
150
|
- lib/scrapers/version.rb
|
150
151
|
- lib/scrapers/xkcd.rb
|
@@ -202,3 +203,4 @@ test_files:
|
|
202
203
|
- spec/scrapers/xkcd_spec.rb
|
203
204
|
- spec/scrapers_spec.rb
|
204
205
|
- spec/spec_helper.rb
|
206
|
+
has_rdoc:
|