scrapers 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +0 -43
- data/bin/manning_books +43 -0
- data/lib/scrapers/manning_books.rb +55 -0
- data/lib/scrapers/version.rb +1 -1
- data/spec/scrapers/manning_books_spec.rb +20 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 003f7ffd1d5c6ccc6df7470045cf46b28fc5256e
|
4
|
+
data.tar.gz: 621f3354ac4af8a765495973ba7e204cde20b570
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b5c40acbe36d4b90d0f06db30f79acdae8c4ab173c6f572f0a7a0bb96400560038187cdbaf8030ee1bec801d822ac9ec7e0ff4cbc5937839f6853a26173fc68
|
7
|
+
data.tar.gz: 35e373c156d79a39f7661453475b5a3cfcfbac855c0d09bdb0a0fa913cae89fc047e29605e98d3189a46672a64b9781e76e972f024e9b2d0bc287dca45b4e5f2
|
data/Rakefile
CHANGED
@@ -2,46 +2,3 @@ require "bundler/gem_tasks"
|
|
2
2
|
require "highline/import"
|
3
3
|
require "active_support/core_ext/string/inflections"
|
4
4
|
|
5
|
-
desc "Create the basis for a new scraper"
|
6
|
-
task :new, [:module_name] do |t, args|
|
7
|
-
if args.module_name
|
8
|
-
module_name = args.module_name
|
9
|
-
else
|
10
|
-
module_name = ask("<%= color('What is the name of your new scraper module?', YELLOW) %>") {|q| q.default = "NewScraper" }
|
11
|
-
end
|
12
|
-
|
13
|
-
file_name = module_name.underscore + ".rb"
|
14
|
-
dir_name = File.join(File.dirname(__FILE__),'lib','scrapers')
|
15
|
-
|
16
|
-
new_scraper_path = File.join(dir_name, file_name)
|
17
|
-
|
18
|
-
template = <<-EOT
|
19
|
-
module Scrapers
|
20
|
-
module #{module_name}
|
21
|
-
|
22
|
-
def self.scrape(url)
|
23
|
-
results = Hash.new
|
24
|
-
|
25
|
-
Mechanize.start(url) do |m|
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
results
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
end
|
34
|
-
EOT
|
35
|
-
|
36
|
-
if File.exist?(new_scraper_path)
|
37
|
-
if agree("<%= color('#{file_name}', BLUE); color('already exists. Do you want to overwrite it?', YELLOW) %>", true)
|
38
|
-
File.unlink(new_scraper_path)
|
39
|
-
else
|
40
|
-
exit 0
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
File.write(new_scraper_path, template)
|
45
|
-
say("<%= color('New scraper in', YELLOW) %> <%= color('#{new_scraper_path}', BLUE) %>")
|
46
|
-
|
47
|
-
end
|
data/bin/manning_books
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'thor'
|
5
|
+
require 'netrc'
|
6
|
+
require 'scrapers/manning_books'
|
7
|
+
require 'awesome_print'
|
8
|
+
require 'pry'
|
9
|
+
|
10
|
+
################################################################################
|
11
|
+
#
|
12
|
+
# Scraper for Manning Books I've purchased
|
13
|
+
#
|
14
|
+
# (Note: you need to log into Manning and have acutally purchased e-books.
|
15
|
+
# They will appear on your dashboard.)
|
16
|
+
#
|
17
|
+
################################################################################
|
18
|
+
|
19
|
+
class ManningBooks < Thor
|
20
|
+
|
21
|
+
MANNING = 'manning'
|
22
|
+
|
23
|
+
desc "download", "Downloads all the editions of all the books on your dashboard"
|
24
|
+
method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination to store the downloads", :default => "."
|
25
|
+
method_option :user, :aliases => %w{-u -U}, :desc => "Manning user. Default is read from $HOME/.netrc"
|
26
|
+
method_option :password, :aliases => %w{-p -pw}, :desc => "Manning password. Default is read from $HOME/.netrc"
|
27
|
+
|
28
|
+
def download
|
29
|
+
netrc = Netrc.read
|
30
|
+
user, pw = netrc[MANNING]
|
31
|
+
user = options.fetch("user", user)
|
32
|
+
pw = options.fetch("password", pw)
|
33
|
+
destination = options.fetch("destination", nil)
|
34
|
+
STDERR.puts "destination: #{destination}, user: #{user}, pw: #{pw.length}"
|
35
|
+
binding.pry
|
36
|
+
Signal.trap('INT', proc { STDERR.puts "Download Interrupted"; exit(-1)})
|
37
|
+
results = Scrapers::ManningBooks.scrape destination, user, pw
|
38
|
+
ap results
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
ManningBooks.start
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'mechanize'
|
3
|
+
require 'pry'
|
4
|
+
|
5
|
+
module Scrapers
|
6
|
+
module ManningBooks
|
7
|
+
|
8
|
+
DASHBOARD_URL = "https://account.manning.com/dashboard"
|
9
|
+
|
10
|
+
def self.scrape(dest=".", user=nil, pw=nil)
|
11
|
+
results = Array.new
|
12
|
+
|
13
|
+
Mechanize.start do |m|
|
14
|
+
m.get DASHBOARD_URL
|
15
|
+
unless m.current_page.uri == DASHBOARD_URL
|
16
|
+
# log in
|
17
|
+
m.current_page.form.field_with(:type => 'email').value= user
|
18
|
+
m.current_page.form.field_with(:type => 'password').value= pw
|
19
|
+
m.current_page.form.submit
|
20
|
+
sleep 2
|
21
|
+
raise "could not log in" unless m.current_page.uri.to_s == DASHBOARD_URL
|
22
|
+
end
|
23
|
+
|
24
|
+
book_downloads = m.current_page.links_with(:href => %r{/account/bookProduct/download})
|
25
|
+
|
26
|
+
Dir.chdir(dest) do |dir|
|
27
|
+
book_downloads.each do |book|
|
28
|
+
puts "Downloading #{book.href}"
|
29
|
+
m.get book.href
|
30
|
+
results << [m.current_page.filename, m.current_page.uri.to_s]
|
31
|
+
puts "Saving #{m.current_page.filename}"
|
32
|
+
m.current_page.save! # overwrite!
|
33
|
+
|
34
|
+
wait_a_bit 5
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
Hash[results]
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.wait_a_bit(delay)
|
45
|
+
puts "delaying for #{delay} second(s)"
|
46
|
+
%w[- \ | /].cycle(delay) do |c|
|
47
|
+
print "\r#{c}"
|
48
|
+
sleep 1
|
49
|
+
end
|
50
|
+
print "\r"
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
data/lib/scrapers/version.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'scrapers/manning_books'
|
4
|
+
|
5
|
+
module Scrapers
|
6
|
+
|
7
|
+
describe ManningBooks do
|
8
|
+
it{should respond_to :scrape}
|
9
|
+
context "scraping" do
|
10
|
+
before(:all) do
|
11
|
+
@comic = VCR.use_cassette('manning_books') do
|
12
|
+
@result = Scrapers::ManningBooks.scrape
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
it {expect(@result).to_not be_nil}
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tamara Temple
|
@@ -169,6 +169,7 @@ description: A library of web site scrapers utilizing mechanize and other goodie
|
|
169
169
|
email:
|
170
170
|
- tamouse@gmail.com
|
171
171
|
executables:
|
172
|
+
- manning_books
|
172
173
|
- new_scraper
|
173
174
|
- rubytapas
|
174
175
|
extensions: []
|
@@ -181,6 +182,7 @@ files:
|
|
181
182
|
- LICENSE.txt
|
182
183
|
- README.md
|
183
184
|
- Rakefile
|
185
|
+
- bin/manning_books
|
184
186
|
- bin/new_scraper
|
185
187
|
- bin/rubytapas
|
186
188
|
- lib/scrapers.rb
|
@@ -190,6 +192,7 @@ files:
|
|
190
192
|
- lib/scrapers/esod.rb
|
191
193
|
- lib/scrapers/gocomics.rb
|
192
194
|
- lib/scrapers/imgur.rb
|
195
|
+
- lib/scrapers/manning_books.rb
|
193
196
|
- lib/scrapers/manning_dashboard.rb
|
194
197
|
- lib/scrapers/nasa_apod.rb
|
195
198
|
- lib/scrapers/rubytapas.rb
|
@@ -202,6 +205,7 @@ files:
|
|
202
205
|
- spec/scrapers/download_spec.rb
|
203
206
|
- spec/scrapers/gocomics_spec.rb
|
204
207
|
- spec/scrapers/imgur_spec.rb
|
208
|
+
- spec/scrapers/manning_books_spec.rb
|
205
209
|
- spec/scrapers/nasa_apod_spec.rb
|
206
210
|
- spec/scrapers/sinfest_spec.rb
|
207
211
|
- spec/scrapers/xkcd_spec.rb
|
@@ -247,9 +251,9 @@ test_files:
|
|
247
251
|
- spec/scrapers/download_spec.rb
|
248
252
|
- spec/scrapers/gocomics_spec.rb
|
249
253
|
- spec/scrapers/imgur_spec.rb
|
254
|
+
- spec/scrapers/manning_books_spec.rb
|
250
255
|
- spec/scrapers/nasa_apod_spec.rb
|
251
256
|
- spec/scrapers/sinfest_spec.rb
|
252
257
|
- spec/scrapers/xkcd_spec.rb
|
253
258
|
- spec/scrapers_spec.rb
|
254
259
|
- spec/spec_helper.rb
|
255
|
-
has_rdoc:
|