scrapers 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +0 -43
- data/bin/manning_books +43 -0
- data/lib/scrapers/manning_books.rb +55 -0
- data/lib/scrapers/version.rb +1 -1
- data/spec/scrapers/manning_books_spec.rb +20 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 003f7ffd1d5c6ccc6df7470045cf46b28fc5256e
|
4
|
+
data.tar.gz: 621f3354ac4af8a765495973ba7e204cde20b570
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b5c40acbe36d4b90d0f06db30f79acdae8c4ab173c6f572f0a7a0bb96400560038187cdbaf8030ee1bec801d822ac9ec7e0ff4cbc5937839f6853a26173fc68
|
7
|
+
data.tar.gz: 35e373c156d79a39f7661453475b5a3cfcfbac855c0d09bdb0a0fa913cae89fc047e29605e98d3189a46672a64b9781e76e972f024e9b2d0bc287dca45b4e5f2
|
data/Rakefile
CHANGED
@@ -2,46 +2,3 @@ require "bundler/gem_tasks"
|
|
2
2
|
require "highline/import"
|
3
3
|
require "active_support/core_ext/string/inflections"
|
4
4
|
|
5
|
-
desc "Create the basis for a new scraper"
|
6
|
-
task :new, [:module_name] do |t, args|
|
7
|
-
if args.module_name
|
8
|
-
module_name = args.module_name
|
9
|
-
else
|
10
|
-
module_name = ask("<%= color('What is the name of your new scraper module?', YELLOW) %>") {|q| q.default = "NewScraper" }
|
11
|
-
end
|
12
|
-
|
13
|
-
file_name = module_name.underscore + ".rb"
|
14
|
-
dir_name = File.join(File.dirname(__FILE__),'lib','scrapers')
|
15
|
-
|
16
|
-
new_scraper_path = File.join(dir_name, file_name)
|
17
|
-
|
18
|
-
template = <<-EOT
|
19
|
-
module Scrapers
|
20
|
-
module #{module_name}
|
21
|
-
|
22
|
-
def self.scrape(url)
|
23
|
-
results = Hash.new
|
24
|
-
|
25
|
-
Mechanize.start(url) do |m|
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
results
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
end
|
34
|
-
EOT
|
35
|
-
|
36
|
-
if File.exist?(new_scraper_path)
|
37
|
-
if agree("<%= color('#{file_name}', BLUE); color('already exists. Do you want to overwrite it?', YELLOW) %>", true)
|
38
|
-
File.unlink(new_scraper_path)
|
39
|
-
else
|
40
|
-
exit 0
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
File.write(new_scraper_path, template)
|
45
|
-
say("<%= color('New scraper in', YELLOW) %> <%= color('#{new_scraper_path}', BLUE) %>")
|
46
|
-
|
47
|
-
end
|
data/bin/manning_books
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'thor'
|
5
|
+
require 'netrc'
|
6
|
+
require 'scrapers/manning_books'
|
7
|
+
require 'awesome_print'
|
8
|
+
require 'pry'
|
9
|
+
|
10
|
+
################################################################################
|
11
|
+
#
|
12
|
+
# Scraper for Manning Books I've purchased
|
13
|
+
#
|
14
|
+
# (Note: you need to log into Manning and have acutally purchased e-books.
|
15
|
+
# They will appear on your dashboard.)
|
16
|
+
#
|
17
|
+
################################################################################
|
18
|
+
|
19
|
+
class ManningBooks < Thor
|
20
|
+
|
21
|
+
MANNING = 'manning'
|
22
|
+
|
23
|
+
desc "download", "Downloads all the editions of all the books on your dashboard"
|
24
|
+
method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination to store the downloads", :default => "."
|
25
|
+
method_option :user, :aliases => %w{-u -U}, :desc => "Manning user. Default is read from $HOME/.netrc"
|
26
|
+
method_option :password, :aliases => %w{-p -pw}, :desc => "Manning password. Default is read from $HOME/.netrc"
|
27
|
+
|
28
|
+
def download
|
29
|
+
netrc = Netrc.read
|
30
|
+
user, pw = netrc[MANNING]
|
31
|
+
user = options.fetch("user", user)
|
32
|
+
pw = options.fetch("password", pw)
|
33
|
+
destination = options.fetch("destination", nil)
|
34
|
+
STDERR.puts "destination: #{destination}, user: #{user}, pw: #{pw.length}"
|
35
|
+
binding.pry
|
36
|
+
Signal.trap('INT', proc { STDERR.puts "Download Interrupted"; exit(-1)})
|
37
|
+
results = Scrapers::ManningBooks.scrape destination, user, pw
|
38
|
+
ap results
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
ManningBooks.start
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'mechanize'
|
3
|
+
require 'pry'
|
4
|
+
|
5
|
+
module Scrapers
|
6
|
+
module ManningBooks
|
7
|
+
|
8
|
+
DASHBOARD_URL = "https://account.manning.com/dashboard"
|
9
|
+
|
10
|
+
def self.scrape(dest=".", user=nil, pw=nil)
|
11
|
+
results = Array.new
|
12
|
+
|
13
|
+
Mechanize.start do |m|
|
14
|
+
m.get DASHBOARD_URL
|
15
|
+
unless m.current_page.uri == DASHBOARD_URL
|
16
|
+
# log in
|
17
|
+
m.current_page.form.field_with(:type => 'email').value= user
|
18
|
+
m.current_page.form.field_with(:type => 'password').value= pw
|
19
|
+
m.current_page.form.submit
|
20
|
+
sleep 2
|
21
|
+
raise "could not log in" unless m.current_page.uri.to_s == DASHBOARD_URL
|
22
|
+
end
|
23
|
+
|
24
|
+
book_downloads = m.current_page.links_with(:href => %r{/account/bookProduct/download})
|
25
|
+
|
26
|
+
Dir.chdir(dest) do |dir|
|
27
|
+
book_downloads.each do |book|
|
28
|
+
puts "Downloading #{book.href}"
|
29
|
+
m.get book.href
|
30
|
+
results << [m.current_page.filename, m.current_page.uri.to_s]
|
31
|
+
puts "Saving #{m.current_page.filename}"
|
32
|
+
m.current_page.save! # overwrite!
|
33
|
+
|
34
|
+
wait_a_bit 5
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
Hash[results]
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.wait_a_bit(delay)
|
45
|
+
puts "delaying for #{delay} second(s)"
|
46
|
+
%w[- \ | /].cycle(delay) do |c|
|
47
|
+
print "\r#{c}"
|
48
|
+
sleep 1
|
49
|
+
end
|
50
|
+
print "\r"
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
data/lib/scrapers/version.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'scrapers/manning_books'
|
4
|
+
|
5
|
+
module Scrapers
|
6
|
+
|
7
|
+
describe ManningBooks do
|
8
|
+
it{should respond_to :scrape}
|
9
|
+
context "scraping" do
|
10
|
+
before(:all) do
|
11
|
+
@comic = VCR.use_cassette('manning_books') do
|
12
|
+
@result = Scrapers::ManningBooks.scrape
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
it {expect(@result).to_not be_nil}
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tamara Temple
|
@@ -169,6 +169,7 @@ description: A library of web site scrapers utilizing mechanize and other goodie
|
|
169
169
|
email:
|
170
170
|
- tamouse@gmail.com
|
171
171
|
executables:
|
172
|
+
- manning_books
|
172
173
|
- new_scraper
|
173
174
|
- rubytapas
|
174
175
|
extensions: []
|
@@ -181,6 +182,7 @@ files:
|
|
181
182
|
- LICENSE.txt
|
182
183
|
- README.md
|
183
184
|
- Rakefile
|
185
|
+
- bin/manning_books
|
184
186
|
- bin/new_scraper
|
185
187
|
- bin/rubytapas
|
186
188
|
- lib/scrapers.rb
|
@@ -190,6 +192,7 @@ files:
|
|
190
192
|
- lib/scrapers/esod.rb
|
191
193
|
- lib/scrapers/gocomics.rb
|
192
194
|
- lib/scrapers/imgur.rb
|
195
|
+
- lib/scrapers/manning_books.rb
|
193
196
|
- lib/scrapers/manning_dashboard.rb
|
194
197
|
- lib/scrapers/nasa_apod.rb
|
195
198
|
- lib/scrapers/rubytapas.rb
|
@@ -202,6 +205,7 @@ files:
|
|
202
205
|
- spec/scrapers/download_spec.rb
|
203
206
|
- spec/scrapers/gocomics_spec.rb
|
204
207
|
- spec/scrapers/imgur_spec.rb
|
208
|
+
- spec/scrapers/manning_books_spec.rb
|
205
209
|
- spec/scrapers/nasa_apod_spec.rb
|
206
210
|
- spec/scrapers/sinfest_spec.rb
|
207
211
|
- spec/scrapers/xkcd_spec.rb
|
@@ -247,9 +251,9 @@ test_files:
|
|
247
251
|
- spec/scrapers/download_spec.rb
|
248
252
|
- spec/scrapers/gocomics_spec.rb
|
249
253
|
- spec/scrapers/imgur_spec.rb
|
254
|
+
- spec/scrapers/manning_books_spec.rb
|
250
255
|
- spec/scrapers/nasa_apod_spec.rb
|
251
256
|
- spec/scrapers/sinfest_spec.rb
|
252
257
|
- spec/scrapers/xkcd_spec.rb
|
253
258
|
- spec/scrapers_spec.rb
|
254
259
|
- spec/spec_helper.rb
|
255
|
-
has_rdoc:
|