scrapers 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4896b8aea0625256112a05ca839b9b9775a4ff85
4
- data.tar.gz: 994a36cc3a5b97eda32c53838c2f519a008e8f0f
3
+ metadata.gz: 003f7ffd1d5c6ccc6df7470045cf46b28fc5256e
4
+ data.tar.gz: 621f3354ac4af8a765495973ba7e204cde20b570
5
5
  SHA512:
6
- metadata.gz: 94d1e19c786ed926aaaa15c56d72fffcdf0157e25ec60cea8b4dd76313b72cf112da0297f39aefed7ab41a26c18589e96706815de0469795bc749450056171ad
7
- data.tar.gz: 7368f28e2bf91a5063acc6e37a68285a87f3cbbf8de534d66218acde300f5e6fc28b7e7935daf5c4232d6a2056fbb9b087034aa5e324ed0b1e6f3f1ccf9e9de5
6
+ metadata.gz: 8b5c40acbe36d4b90d0f06db30f79acdae8c4ab173c6f572f0a7a0bb96400560038187cdbaf8030ee1bec801d822ac9ec7e0ff4cbc5937839f6853a26173fc68
7
+ data.tar.gz: 35e373c156d79a39f7661453475b5a3cfcfbac855c0d09bdb0a0fa913cae89fc047e29605e98d3189a46672a64b9781e76e972f024e9b2d0bc287dca45b4e5f2
data/Rakefile CHANGED
@@ -2,46 +2,3 @@ require "bundler/gem_tasks"
2
2
  require "highline/import"
3
3
  require "active_support/core_ext/string/inflections"
4
4
 
5
- desc "Create the basis for a new scraper"
6
- task :new, [:module_name] do |t, args|
7
- if args.module_name
8
- module_name = args.module_name
9
- else
10
- module_name = ask("<%= color('What is the name of your new scraper module?', YELLOW) %>") {|q| q.default = "NewScraper" }
11
- end
12
-
13
- file_name = module_name.underscore + ".rb"
14
- dir_name = File.join(File.dirname(__FILE__),'lib','scrapers')
15
-
16
- new_scraper_path = File.join(dir_name, file_name)
17
-
18
- template = <<-EOT
19
- module Scrapers
20
- module #{module_name}
21
-
22
- def self.scrape(url)
23
- results = Hash.new
24
-
25
- Mechanize.start(url) do |m|
26
-
27
- end
28
-
29
- results
30
- end
31
-
32
- end
33
- end
34
- EOT
35
-
36
- if File.exist?(new_scraper_path)
37
- if agree("<%= color('#{file_name}', BLUE); color('already exists. Do you want to overwrite it?', YELLOW) %>", true)
38
- File.unlink(new_scraper_path)
39
- else
40
- exit 0
41
- end
42
- end
43
-
44
- File.write(new_scraper_path, template)
45
- say("<%= color('New scraper in', YELLOW) %> <%= color('#{new_scraper_path}', BLUE) %>")
46
-
47
- end
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'thor'
5
+ require 'netrc'
6
+ require 'scrapers/manning_books'
7
+ require 'awesome_print'
8
+ require 'pry'
9
+
10
+ ################################################################################
11
+ #
12
+ # Scraper for Manning Books I've purchased
13
+ #
14
+ # (Note: you need to log into Manning and have acutally purchased e-books.
15
+ # They will appear on your dashboard.)
16
+ #
17
+ ################################################################################
18
+
19
+ class ManningBooks < Thor
20
+
21
+ MANNING = 'manning'
22
+
23
+ desc "download", "Downloads all the editions of all the books on your dashboard"
24
+ method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination to store the downloads", :default => "."
25
+ method_option :user, :aliases => %w{-u -U}, :desc => "Manning user. Default is read from $HOME/.netrc"
26
+ method_option :password, :aliases => %w{-p -pw}, :desc => "Manning password. Default is read from $HOME/.netrc"
27
+
28
+ def download
29
+ netrc = Netrc.read
30
+ user, pw = netrc[MANNING]
31
+ user = options.fetch("user", user)
32
+ pw = options.fetch("password", pw)
33
+ destination = options.fetch("destination", nil)
34
+ STDERR.puts "destination: #{destination}, user: #{user}, pw: #{pw.length}"
35
+ binding.pry
36
+ Signal.trap('INT', proc { STDERR.puts "Download Interrupted"; exit(-1)})
37
+ results = Scrapers::ManningBooks.scrape destination, user, pw
38
+ ap results
39
+ end
40
+
41
+ end
42
+
43
+ ManningBooks.start
@@ -0,0 +1,55 @@
1
+ # -*- ruby -*-
2
+ require 'mechanize'
3
+ require 'pry'
4
+
5
+ module Scrapers
6
+ module ManningBooks
7
+
8
+ DASHBOARD_URL = "https://account.manning.com/dashboard"
9
+
10
+ def self.scrape(dest=".", user=nil, pw=nil)
11
+ results = Array.new
12
+
13
+ Mechanize.start do |m|
14
+ m.get DASHBOARD_URL
15
+ unless m.current_page.uri == DASHBOARD_URL
16
+ # log in
17
+ m.current_page.form.field_with(:type => 'email').value= user
18
+ m.current_page.form.field_with(:type => 'password').value= pw
19
+ m.current_page.form.submit
20
+ sleep 2
21
+ raise "could not log in" unless m.current_page.uri.to_s == DASHBOARD_URL
22
+ end
23
+
24
+ book_downloads = m.current_page.links_with(:href => %r{/account/bookProduct/download})
25
+
26
+ Dir.chdir(dest) do |dir|
27
+ book_downloads.each do |book|
28
+ puts "Downloading #{book.href}"
29
+ m.get book.href
30
+ results << [m.current_page.filename, m.current_page.uri.to_s]
31
+ puts "Saving #{m.current_page.filename}"
32
+ m.current_page.save! # overwrite!
33
+
34
+ wait_a_bit 5
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+
41
+ Hash[results]
42
+ end
43
+
44
+ def self.wait_a_bit(delay)
45
+ puts "delaying for #{delay} second(s)"
46
+ %w[- \ | /].cycle(delay) do |c|
47
+ print "\r#{c}"
48
+ sleep 1
49
+ end
50
+ print "\r"
51
+ end
52
+
53
+
54
+ end
55
+ end
@@ -2,7 +2,7 @@ module Scrapers
2
2
  module Version
3
3
 
4
4
  MAJOR = 1
5
- MINOR = 4
5
+ MINOR = 5
6
6
  BUILD = 0
7
7
 
8
8
  end
@@ -0,0 +1,20 @@
1
+ # -*- ruby -*-
2
+ require 'spec_helper'
3
+ require 'scrapers/manning_books'
4
+
5
+ module Scrapers
6
+
7
+ describe ManningBooks do
8
+ it{should respond_to :scrape}
9
+ context "scraping" do
10
+ before(:all) do
11
+ @comic = VCR.use_cassette('manning_books') do
12
+ @result = Scrapers::ManningBooks.scrape
13
+ end
14
+ end
15
+
16
+ it {expect(@result).to_not be_nil}
17
+
18
+ end
19
+ end
20
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapers
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tamara Temple
@@ -169,6 +169,7 @@ description: A library of web site scrapers utilizing mechanize and other goodie
169
169
  email:
170
170
  - tamouse@gmail.com
171
171
  executables:
172
+ - manning_books
172
173
  - new_scraper
173
174
  - rubytapas
174
175
  extensions: []
@@ -181,6 +182,7 @@ files:
181
182
  - LICENSE.txt
182
183
  - README.md
183
184
  - Rakefile
185
+ - bin/manning_books
184
186
  - bin/new_scraper
185
187
  - bin/rubytapas
186
188
  - lib/scrapers.rb
@@ -190,6 +192,7 @@ files:
190
192
  - lib/scrapers/esod.rb
191
193
  - lib/scrapers/gocomics.rb
192
194
  - lib/scrapers/imgur.rb
195
+ - lib/scrapers/manning_books.rb
193
196
  - lib/scrapers/manning_dashboard.rb
194
197
  - lib/scrapers/nasa_apod.rb
195
198
  - lib/scrapers/rubytapas.rb
@@ -202,6 +205,7 @@ files:
202
205
  - spec/scrapers/download_spec.rb
203
206
  - spec/scrapers/gocomics_spec.rb
204
207
  - spec/scrapers/imgur_spec.rb
208
+ - spec/scrapers/manning_books_spec.rb
205
209
  - spec/scrapers/nasa_apod_spec.rb
206
210
  - spec/scrapers/sinfest_spec.rb
207
211
  - spec/scrapers/xkcd_spec.rb
@@ -247,9 +251,9 @@ test_files:
247
251
  - spec/scrapers/download_spec.rb
248
252
  - spec/scrapers/gocomics_spec.rb
249
253
  - spec/scrapers/imgur_spec.rb
254
+ - spec/scrapers/manning_books_spec.rb
250
255
  - spec/scrapers/nasa_apod_spec.rb
251
256
  - spec/scrapers/sinfest_spec.rb
252
257
  - spec/scrapers/xkcd_spec.rb
253
258
  - spec/scrapers_spec.rb
254
259
  - spec/spec_helper.rb
255
- has_rdoc: