scrapers 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/scrapers/rubytapas.rb +65 -0
- data/lib/scrapers/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db591801f04aaf0af906b5b93790c8ee7521a6d1
|
4
|
+
data.tar.gz: a72274f8326f3f1ad4412b15dec1fd801d821bad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 824c3b131c1bdcd1f4cfecba7b691eb1c07110075ca884452c2476614ec8d9466214c8c42e201e8516f04d67cdb96444840fbbe4015f4fdb5752501768d4f4c5
|
7
|
+
data.tar.gz: b7c9dcb97048074027e88d75f3fcedac78066b04a1cddf9b27dea075321523c31f4224842e14c770e37bf797c3064d3995321a4baa53b3e3574d64473611d100
|
data/.gitignore
CHANGED
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module Scrapers
|
5
|
+
|
6
|
+
module RubyTapas
|
7
|
+
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# Save the post and attachments from an episode of RubyTapas
|
11
|
+
# in a directory determined from the episode title.
|
12
|
+
#
|
13
|
+
# Example:
|
14
|
+
# episode url: "https://rubytapas.dpdcart.com/subscriber/post?id=443"
|
15
|
+
# title: "177 Aliasing | RubyTapas"
|
16
|
+
# subdirectory: /177-aliasing
|
17
|
+
#
|
18
|
+
# Parameters:
|
19
|
+
#
|
20
|
+
# * *url* - url of the episode to download
|
21
|
+
# * *user* - username used to log into dpdcart
|
22
|
+
# * *pw* - password used with username
|
23
|
+
# * *dest* - destination directory to put episode subdirectory
|
24
|
+
#
|
25
|
+
def scrape(url=nil, user=nil, pw=nil, dest=".")
|
26
|
+
raise "Must give user and password for RubyTapas downloads" if user.to_s.empty? or pw.to_s.empty?
|
27
|
+
dest = File.realdirpath(dest)
|
28
|
+
raise "Destination #{dest} must be a writeable directory" unless File.directory?(dest) and File.writable?(dest)
|
29
|
+
|
30
|
+
Mechanize.start do |m|
|
31
|
+
|
32
|
+
tapas = OpenStruct.new
|
33
|
+
|
34
|
+
# First time, we will get redirected to the login page
|
35
|
+
m.get url
|
36
|
+
m.current_page.form.field_with(:name => "username").value = user
|
37
|
+
m.current_page.form.field_with(:name => "password").value = pw
|
38
|
+
m.current_page.form.submit
|
39
|
+
|
40
|
+
# Second time, we should land on episode page
|
41
|
+
m.get url
|
42
|
+
raise "Not where I expected. #{m.current_page.uri} is not #{url}" unless m.current_page.uri != url
|
43
|
+
|
44
|
+
m.current_page.tap do |page|
|
45
|
+
tapas.title = page.title.strip
|
46
|
+
tapas.episode_dir = File.join(dest,tapas.title.split("|").first.strip.downcase.gsub(%r{\s+},'-'))
|
47
|
+
tapas.attachments = page.links_with(:href => %r{\bdownload\b})
|
48
|
+
FileUtils.mkdir(tapas.episode_dir)
|
49
|
+
Dir.chdir(tapas.episode_dir) do |dir|
|
50
|
+
tapas.attachments.each do |att|
|
51
|
+
puts "fetching #{att.text}"
|
52
|
+
file = att.click
|
53
|
+
puts "saving #{file.filename}"
|
54
|
+
file.save
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
tapas
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
data/lib/scrapers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tamara Temple
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -145,6 +145,7 @@ files:
|
|
145
145
|
- lib/scrapers/gocomics.rb
|
146
146
|
- lib/scrapers/imgur.rb
|
147
147
|
- lib/scrapers/nasa_apod.rb
|
148
|
+
- lib/scrapers/rubytapas.rb
|
148
149
|
- lib/scrapers/sinfest.rb
|
149
150
|
- lib/scrapers/version.rb
|
150
151
|
- lib/scrapers/xkcd.rb
|
@@ -202,3 +203,4 @@ test_files:
|
|
202
203
|
- spec/scrapers/xkcd_spec.rb
|
203
204
|
- spec/scrapers_spec.rb
|
204
205
|
- spec/spec_helper.rb
|
206
|
+
has_rdoc:
|