scrapers 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4896b8aea0625256112a05ca839b9b9775a4ff85
4
- data.tar.gz: 994a36cc3a5b97eda32c53838c2f519a008e8f0f
3
+ metadata.gz: 003f7ffd1d5c6ccc6df7470045cf46b28fc5256e
4
+ data.tar.gz: 621f3354ac4af8a765495973ba7e204cde20b570
5
5
  SHA512:
6
- metadata.gz: 94d1e19c786ed926aaaa15c56d72fffcdf0157e25ec60cea8b4dd76313b72cf112da0297f39aefed7ab41a26c18589e96706815de0469795bc749450056171ad
7
- data.tar.gz: 7368f28e2bf91a5063acc6e37a68285a87f3cbbf8de534d66218acde300f5e6fc28b7e7935daf5c4232d6a2056fbb9b087034aa5e324ed0b1e6f3f1ccf9e9de5
6
+ metadata.gz: 8b5c40acbe36d4b90d0f06db30f79acdae8c4ab173c6f572f0a7a0bb96400560038187cdbaf8030ee1bec801d822ac9ec7e0ff4cbc5937839f6853a26173fc68
7
+ data.tar.gz: 35e373c156d79a39f7661453475b5a3cfcfbac855c0d09bdb0a0fa913cae89fc047e29605e98d3189a46672a64b9781e76e972f024e9b2d0bc287dca45b4e5f2
data/Rakefile CHANGED
@@ -2,46 +2,3 @@ require "bundler/gem_tasks"
2
2
  require "highline/import"
3
3
  require "active_support/core_ext/string/inflections"
4
4
 
5
- desc "Create the basis for a new scraper"
6
- task :new, [:module_name] do |t, args|
7
- if args.module_name
8
- module_name = args.module_name
9
- else
10
- module_name = ask("<%= color('What is the name of your new scraper module?', YELLOW) %>") {|q| q.default = "NewScraper" }
11
- end
12
-
13
- file_name = module_name.underscore + ".rb"
14
- dir_name = File.join(File.dirname(__FILE__),'lib','scrapers')
15
-
16
- new_scraper_path = File.join(dir_name, file_name)
17
-
18
- template = <<-EOT
19
- module Scrapers
20
- module #{module_name}
21
-
22
- def self.scrape(url)
23
- results = Hash.new
24
-
25
- Mechanize.start(url) do |m|
26
-
27
- end
28
-
29
- results
30
- end
31
-
32
- end
33
- end
34
- EOT
35
-
36
- if File.exist?(new_scraper_path)
37
- if agree("<%= color('#{file_name}', BLUE); color('already exists. Do you want to overwrite it?', YELLOW) %>", true)
38
- File.unlink(new_scraper_path)
39
- else
40
- exit 0
41
- end
42
- end
43
-
44
- File.write(new_scraper_path, template)
45
- say("<%= color('New scraper in', YELLOW) %> <%= color('#{new_scraper_path}', BLUE) %>")
46
-
47
- end
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'thor'
5
+ require 'netrc'
6
+ require 'scrapers/manning_books'
7
+ require 'awesome_print'
8
+ require 'pry'
9
+
10
+ ################################################################################
11
+ #
12
+ # Scraper for Manning Books I've purchased
13
+ #
14
+ # (Note: you need to log into Manning and have acutally purchased e-books.
15
+ # They will appear on your dashboard.)
16
+ #
17
+ ################################################################################
18
+
19
+ class ManningBooks < Thor
20
+
21
+ MANNING = 'manning'
22
+
23
+ desc "download", "Downloads all the editions of all the books on your dashboard"
24
+ method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination to store the downloads", :default => "."
25
+ method_option :user, :aliases => %w{-u -U}, :desc => "Manning user. Default is read from $HOME/.netrc"
26
+ method_option :password, :aliases => %w{-p -pw}, :desc => "Manning password. Default is read from $HOME/.netrc"
27
+
28
+ def download
29
+ netrc = Netrc.read
30
+ user, pw = netrc[MANNING]
31
+ user = options.fetch("user", user)
32
+ pw = options.fetch("password", pw)
33
+ destination = options.fetch("destination", nil)
34
+ STDERR.puts "destination: #{destination}, user: #{user}, pw: #{pw.length}"
35
+ binding.pry
36
+ Signal.trap('INT', proc { STDERR.puts "Download Interrupted"; exit(-1)})
37
+ results = Scrapers::ManningBooks.scrape destination, user, pw
38
+ ap results
39
+ end
40
+
41
+ end
42
+
43
+ ManningBooks.start
@@ -0,0 +1,55 @@
1
+ # -*- ruby -*-
2
+ require 'mechanize'
3
+ require 'pry'
4
+
5
+ module Scrapers
6
+ module ManningBooks
7
+
8
+ DASHBOARD_URL = "https://account.manning.com/dashboard"
9
+
10
+ def self.scrape(dest=".", user=nil, pw=nil)
11
+ results = Array.new
12
+
13
+ Mechanize.start do |m|
14
+ m.get DASHBOARD_URL
15
+ unless m.current_page.uri == DASHBOARD_URL
16
+ # log in
17
+ m.current_page.form.field_with(:type => 'email').value= user
18
+ m.current_page.form.field_with(:type => 'password').value= pw
19
+ m.current_page.form.submit
20
+ sleep 2
21
+ raise "could not log in" unless m.current_page.uri.to_s == DASHBOARD_URL
22
+ end
23
+
24
+ book_downloads = m.current_page.links_with(:href => %r{/account/bookProduct/download})
25
+
26
+ Dir.chdir(dest) do |dir|
27
+ book_downloads.each do |book|
28
+ puts "Downloading #{book.href}"
29
+ m.get book.href
30
+ results << [m.current_page.filename, m.current_page.uri.to_s]
31
+ puts "Saving #{m.current_page.filename}"
32
+ m.current_page.save! # overwrite!
33
+
34
+ wait_a_bit 5
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+
41
+ Hash[results]
42
+ end
43
+
44
+ def self.wait_a_bit(delay)
45
+ puts "delaying for #{delay} second(s)"
46
+ %w[- \ | /].cycle(delay) do |c|
47
+ print "\r#{c}"
48
+ sleep 1
49
+ end
50
+ print "\r"
51
+ end
52
+
53
+
54
+ end
55
+ end
@@ -2,7 +2,7 @@ module Scrapers
2
2
  module Version
3
3
 
4
4
  MAJOR = 1
5
- MINOR = 4
5
+ MINOR = 5
6
6
  BUILD = 0
7
7
 
8
8
  end
@@ -0,0 +1,20 @@
1
+ # -*- ruby -*-
2
+ require 'spec_helper'
3
+ require 'scrapers/manning_books'
4
+
5
+ module Scrapers
6
+
7
+ describe ManningBooks do
8
+ it{should respond_to :scrape}
9
+ context "scraping" do
10
+ before(:all) do
11
+ @comic = VCR.use_cassette('manning_books') do
12
+ @result = Scrapers::ManningBooks.scrape
13
+ end
14
+ end
15
+
16
+ it {expect(@result).to_not be_nil}
17
+
18
+ end
19
+ end
20
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapers
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tamara Temple
@@ -169,6 +169,7 @@ description: A library of web site scrapers utilizing mechanize and other goodie
169
169
  email:
170
170
  - tamouse@gmail.com
171
171
  executables:
172
+ - manning_books
172
173
  - new_scraper
173
174
  - rubytapas
174
175
  extensions: []
@@ -181,6 +182,7 @@ files:
181
182
  - LICENSE.txt
182
183
  - README.md
183
184
  - Rakefile
185
+ - bin/manning_books
184
186
  - bin/new_scraper
185
187
  - bin/rubytapas
186
188
  - lib/scrapers.rb
@@ -190,6 +192,7 @@ files:
190
192
  - lib/scrapers/esod.rb
191
193
  - lib/scrapers/gocomics.rb
192
194
  - lib/scrapers/imgur.rb
195
+ - lib/scrapers/manning_books.rb
193
196
  - lib/scrapers/manning_dashboard.rb
194
197
  - lib/scrapers/nasa_apod.rb
195
198
  - lib/scrapers/rubytapas.rb
@@ -202,6 +205,7 @@ files:
202
205
  - spec/scrapers/download_spec.rb
203
206
  - spec/scrapers/gocomics_spec.rb
204
207
  - spec/scrapers/imgur_spec.rb
208
+ - spec/scrapers/manning_books_spec.rb
205
209
  - spec/scrapers/nasa_apod_spec.rb
206
210
  - spec/scrapers/sinfest_spec.rb
207
211
  - spec/scrapers/xkcd_spec.rb
@@ -247,9 +251,9 @@ test_files:
247
251
  - spec/scrapers/download_spec.rb
248
252
  - spec/scrapers/gocomics_spec.rb
249
253
  - spec/scrapers/imgur_spec.rb
254
+ - spec/scrapers/manning_books_spec.rb
250
255
  - spec/scrapers/nasa_apod_spec.rb
251
256
  - spec/scrapers/sinfest_spec.rb
252
257
  - spec/scrapers/xkcd_spec.rb
253
258
  - spec/scrapers_spec.rb
254
259
  - spec/spec_helper.rb
255
- has_rdoc: