ar_book_finder 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +19 -6
- data/lib/ar_book_finder.rb +6 -4
- data/lib/ar_book_finder/book.rb +8 -3
- data/lib/ar_book_finder/collection_processor.rb +30 -0
- data/lib/ar_book_finder/constants.rb +3 -0
- data/lib/ar_book_finder/pagination_processor.rb +10 -16
- data/lib/ar_book_finder/quick_search_processor.rb +6 -6
- data/lib/ar_book_finder/scraper.rb +23 -5
- data/lib/ar_book_finder/search_results_parser.rb +18 -8
- data/lib/ar_book_finder/user_type_processor.rb +3 -1
- data/lib/ar_book_finder/version.rb +1 -1
- data/spec/ar_book_finder/collection_spec.rb +25 -0
- data/spec/ar_book_finder/search_spec.rb +20 -0
- metadata +8 -6
- data/spec/ar_book_finder/book_spec.rb +0 -10
- data/spec/ar_book_finder_spec.rb +0 -15
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Retrieve book data from [arbookfind.com](http://www.arbookfind.com)
|
4
4
|
|
5
|
-
[![Build Status](https://travis-ci.org/anthonator/ar-book-finder.png?branch=master)](https://travis-ci.org/anthonator/ar-book-finder) [![Dependency Status](https://gemnasium.com/anthonator/ar-book-finder.png)](https://gemnasium.com/anthonator/ar-book-finder) [![Coverage Status](https://coveralls.io/repos/anthonator/ar-book-finder/badge.png)](https://coveralls.io/r/anthonator/ar-book-finder) [![Code Climate](https://codeclimate.com/github/anthonator/ar-book-finder.png)](https://codeclimate.com/github/anthonator/ar-book-finder)
|
5
|
+
[![Gem Version](https://badge.fury.io/rb/ar_book_finder.png)](http://badge.fury.io/rb/ar_book_finder) [![Build Status](https://travis-ci.org/anthonator/ar-book-finder.png?branch=master)](https://travis-ci.org/anthonator/ar-book-finder) [![Dependency Status](https://gemnasium.com/anthonator/ar-book-finder.png)](https://gemnasium.com/anthonator/ar-book-finder) [![Coverage Status](https://coveralls.io/repos/anthonator/ar-book-finder/badge.png)](https://coveralls.io/r/anthonator/ar-book-finder) [![Code Climate](https://codeclimate.com/github/anthonator/ar-book-finder.png)](https://codeclimate.com/github/anthonator/ar-book-finder)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -68,7 +68,7 @@ And publisher details:
|
|
68
68
|
## Usage
|
69
69
|
|
70
70
|
### Quick Search
|
71
|
-
Quick search
|
71
|
+
Quick search will allow you to perform searches based on title, topics, author or ISBN.
|
72
72
|
|
73
73
|
Perform a quick search...
|
74
74
|
```ruby
|
@@ -79,7 +79,7 @@ results.books # Retrieve the books returned on this page
|
|
79
79
|
Load book data on demand using ```#fetch```...
|
80
80
|
``` ruby
|
81
81
|
...
|
82
|
-
book = results[0]
|
82
|
+
book = results.book[0]
|
83
83
|
book.fetch # Retrieve book data
|
84
84
|
book.title
|
85
85
|
book.author
|
@@ -94,11 +94,24 @@ publisher.isbn
|
|
94
94
|
...
|
95
95
|
```
|
96
96
|
|
97
|
-
###
|
98
|
-
|
97
|
+
### Collections
|
98
|
+
Collections will retrieve books lists. Examples include awards, state lists, etc.
|
99
|
+
|
100
|
+
Retrieve a collection...
|
101
|
+
```ruby
|
102
|
+
results = ARBookFinder.collection({ 'Awards' => 'ALA Notable/Best Books' })
|
103
|
+
```
|
104
|
+
|
105
|
+
Retrieve a collection with a multi-level hash...
|
106
|
+
```ruby
|
107
|
+
results = ARBookFinder.collection({ 'State Lists' => { 'Indiana' => 'IN Young Hoosier Middle Grades Book Award Nominees 2013-2014' } })
|
108
|
+
```
|
109
|
+
|
110
|
+
### Pagination
|
111
|
+
It's also possible to paginate search results.
|
99
112
|
```ruby
|
100
113
|
# Retrieve results for page 2 and sort on title
|
101
|
-
results = ARBookFinder.
|
114
|
+
results = ARBookFinder.search('harry potter', 2)
|
102
115
|
```
|
103
116
|
|
104
117
|
## Contributing
|
data/lib/ar_book_finder.rb
CHANGED
@@ -2,9 +2,11 @@ require 'capybara'
|
|
2
2
|
require 'capybara/poltergeist'
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
|
+
require 'ar_book_finder/constants'
|
5
6
|
require 'ar_book_finder/user_type_processor'
|
6
7
|
require 'ar_book_finder/pagination_processor'
|
7
8
|
require 'ar_book_finder/quick_search_processor'
|
9
|
+
require 'ar_book_finder/collection_processor'
|
8
10
|
require 'ar_book_finder/book_detail_processor'
|
9
11
|
require 'ar_book_finder/search_results_parser'
|
10
12
|
require 'ar_book_finder/book_detail_parser'
|
@@ -24,15 +26,15 @@ module ARBookFinder
|
|
24
26
|
Scraper.new(user_type)
|
25
27
|
end
|
26
28
|
|
27
|
-
def self.search(query, page = 1
|
28
|
-
scraper(options[:user_type]).search(query, page
|
29
|
+
def self.search(query, page = 1)
|
30
|
+
scraper(options[:user_type]).search(query, page)
|
29
31
|
end
|
30
32
|
|
31
33
|
def self.advanced_search(user_type, search_type, params)
|
32
34
|
raise 'Not yet implemented'
|
33
35
|
end
|
34
36
|
|
35
|
-
def self.collection(
|
36
|
-
|
37
|
+
def self.collection(collection, page = 1)
|
38
|
+
scraper(options[:user_type]).collection(collection, page)
|
37
39
|
end
|
38
40
|
end
|
data/lib/ar_book_finder/book.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
module ARBookFinder
|
2
4
|
class Book
|
3
5
|
attr_reader :cover, :title, :author, :summary,
|
@@ -29,12 +31,11 @@ module ARBookFinder
|
|
29
31
|
load_ar_quiz_availability(parsed_data[:ar_quiz_availability])
|
30
32
|
load_topics(parsed_data[:topics])
|
31
33
|
load_series(parsed_data[:series])
|
32
|
-
|
33
|
-
parsed_data[:publishers].each { |p| @publishers << Publisher.new(p) }
|
34
|
+
load_publishers(parsed_data[:publishers])
|
34
35
|
end
|
35
36
|
|
36
37
|
def load_ar_quiz_availability(ar_quiz_availability)
|
37
|
-
@ar_quiz_availability = ar_quiz_availability.split(',').collect { |v| v.strip }
|
38
|
+
@ar_quiz_availability = ar_quiz_availability.split(',').collect { |v| v.strip.gsub(' ', '') }
|
38
39
|
end
|
39
40
|
|
40
41
|
def load_topics(topics)
|
@@ -46,5 +47,9 @@ module ARBookFinder
|
|
46
47
|
def load_series(series)
|
47
48
|
@series = series.split(';').collect { |v| v.strip }
|
48
49
|
end
|
50
|
+
|
51
|
+
def load_publishers(publishers)
|
52
|
+
publishers.each { |p| @publishers << Publisher.new(p) }
|
53
|
+
end
|
49
54
|
end
|
50
55
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module ARBookFinder
|
2
|
+
class CollectionProcessor
|
3
|
+
include Capybara::DSL
|
4
|
+
|
5
|
+
COLLECTIONS_URL = "#{ARBookFinder::BASE_URL}/collections.aspx"
|
6
|
+
|
7
|
+
def initialize(collections)
|
8
|
+
@collections = collections
|
9
|
+
end
|
10
|
+
|
11
|
+
def process
|
12
|
+
unless current_url.downcase == COLLECTIONS_URL
|
13
|
+
visit(COLLECTIONS_URL)
|
14
|
+
end
|
15
|
+
navigate_collection(@collections)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def navigate_collection(collection)
|
20
|
+
collection.each do |key, value|
|
21
|
+
click_on(key)
|
22
|
+
if value.kind_of?(Hash)
|
23
|
+
navigate_collection(value)
|
24
|
+
else
|
25
|
+
click_on(value)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -1,27 +1,21 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class PaginationProcessor
|
3
3
|
include Capybara::DSL
|
4
|
+
|
5
|
+
SEARCH_GOTO_FIELD = 'ctl00_ContentPlaceHolder1_ucSeachResults_txtPageToGoToTop'
|
6
|
+
SEARCH_SUBMIT_BUTTON = 'ctl00_ContentPlaceHolder1_ucSeachResults_btnGoToPageTop'
|
7
|
+
|
8
|
+
COLLECTION_GOTO_FIELD = 'ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_txtPageToGoToTop'
|
9
|
+
COLLECTION_SUBMIT_BUTTON = 'ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_btnGoToPageTop'
|
4
10
|
|
5
|
-
|
6
|
-
|
7
|
-
SORT_BY_TYPES = {
|
8
|
-
title: 'Title',
|
9
|
-
author: 'Author',
|
10
|
-
interest_level: 'Interest Level',
|
11
|
-
book_level: 'Book Level',
|
12
|
-
relevance: 'Relevance',
|
13
|
-
rating: 'Rating'
|
14
|
-
}
|
15
|
-
|
16
|
-
def initialize(page, sort_by)
|
11
|
+
def initialize(page, collection = false)
|
17
12
|
@page = page
|
18
|
-
@
|
13
|
+
@field_const = collection ? :COLLECTION : :SEARCH
|
19
14
|
end
|
20
15
|
|
21
16
|
def process
|
22
|
-
fill_in(
|
23
|
-
|
24
|
-
click_button('ctl00_ContentPlaceHolder1_ucSeachResults_btnGoToPageTop')
|
17
|
+
fill_in(self.class.const_get(:"#{@field_const}_GOTO_FIELD"), with: @page)
|
18
|
+
click_button(self.class.const_get(:"#{@field_const}_SUBMIT_BUTTON"))
|
25
19
|
end
|
26
20
|
end
|
27
21
|
end
|
@@ -1,19 +1,19 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class QuickSearchProcessor
|
3
3
|
include Capybara::DSL
|
4
|
+
|
5
|
+
QUICK_SEARCH_URL = "#{ARBookFinder::BASE_URL}/default.aspx"
|
4
6
|
|
5
|
-
def initialize(query
|
7
|
+
def initialize(query)
|
6
8
|
@query = query
|
7
|
-
@page = page
|
8
|
-
@sort_by = sort_by
|
9
9
|
end
|
10
10
|
|
11
11
|
def process
|
12
|
+
unless current_url.downcase == QUICK_SEARCH_URL
|
13
|
+
visit(QUICK_SEARCH_URL)
|
14
|
+
end
|
12
15
|
fill_in('ctl00_ContentPlaceHolder1_txtKeyWords', with: @query)
|
13
16
|
click_button('ctl00_ContentPlaceHolder1_btnDoIt')
|
14
|
-
if @page > 1 || @sort_by != PaginationProcessor::DEFAULT_SORT_BY
|
15
|
-
PaginationProcessor.new(@page, @sort_by).process
|
16
|
-
end
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
@@ -1,13 +1,31 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class Scraper
|
3
3
|
def initialize(user_type)
|
4
|
-
|
4
|
+
UserTypeProcessor.new(user_type).process
|
5
5
|
end
|
6
6
|
|
7
|
-
def search(query, page = 1
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def search(query, page = 1)
|
8
|
+
QuickSearchProcessor.new(query).process
|
9
|
+
results = SearchResultsParser.new(Capybara.page.html).parse
|
10
|
+
if page > 1
|
11
|
+
results = paginate(page, false)
|
12
|
+
end
|
13
|
+
results
|
14
|
+
end
|
15
|
+
|
16
|
+
def collection(collection, page = 1)
|
17
|
+
CollectionProcessor.new(collection).process
|
18
|
+
results = SearchResultsParser.new(Capybara.page.html, true).parse
|
19
|
+
if page > 1
|
20
|
+
results = paginate(page, true)
|
21
|
+
end
|
22
|
+
results
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def paginate(page, collection)
|
27
|
+
PaginationProcessor.new(page, collection).process
|
28
|
+
SearchResultsParser.new(Capybara.page.html, collection).parse
|
11
29
|
end
|
12
30
|
end
|
13
31
|
end
|
@@ -1,36 +1,46 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class SearchResultsParser
|
3
|
-
|
4
|
-
|
3
|
+
SEARCH_PAGE_COUNT_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucSeachResults_lblResultsSummaryTop"]'
|
4
|
+
SEARCH_RESULTS_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucSeachResults_lblQuizzes"]/table'
|
5
|
+
|
6
|
+
COLLECTION_PAGE_COUNT_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_lblResultsSummaryTop"]'
|
7
|
+
COLLECTION_RESULTS_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_lblQuizzes"]/table'
|
8
|
+
|
5
9
|
BOOK_XPATH = 'tbody/tr/td[2]'
|
6
10
|
BOOK_DETAIL_XPATH = 'table/tbody/tr/td[2]'
|
7
11
|
BOOK_URL_XPATH = 'a'
|
8
12
|
|
9
|
-
attr_reader :page_count, :books
|
13
|
+
attr_reader :current_page, :page_count, :total_books, :books
|
10
14
|
|
11
|
-
def initialize(html)
|
15
|
+
def initialize(html, collection = false)
|
12
16
|
@doc = Nokogiri::HTML.parse(html)
|
17
|
+
@xpath_const = collection ? :COLLECTION : :SEARCH
|
13
18
|
@books = []
|
14
19
|
end
|
15
20
|
|
16
21
|
def parse
|
17
|
-
@
|
22
|
+
@current_page = parse_current_page.to_i
|
23
|
+
@page_count = parse_page_count.to_i
|
18
24
|
@books = parse_results
|
19
25
|
self
|
20
26
|
end
|
21
27
|
|
22
28
|
private
|
29
|
+
def parse_current_page
|
30
|
+
@doc.xpath(self.class.const_get(:"#{@xpath_const}_PAGE_COUNT_XPATH")).text.gsub(/Page /, '').gsub(/ of \d+/, '')
|
31
|
+
end
|
32
|
+
|
23
33
|
def parse_page_count
|
24
|
-
@doc.xpath(
|
34
|
+
@doc.xpath(self.class.const_get(:"#{@xpath_const}_PAGE_COUNT_XPATH")).text.gsub(/Page \d+ of /, '')
|
25
35
|
end
|
26
36
|
|
27
37
|
def parse_results
|
28
38
|
books = []
|
29
|
-
@doc.xpath(
|
39
|
+
@doc.xpath(self.class.const_get(:"#{@xpath_const}_RESULTS_XPATH")).each_with_index do |result, i|
|
30
40
|
next if i.odd?
|
31
41
|
book = result.xpath(BOOK_XPATH)
|
32
42
|
book_detail = book.xpath(BOOK_DETAIL_XPATH)
|
33
|
-
books << Book.new(
|
43
|
+
books << Book.new("#{ARBookFinder::BASE_URL}/#{book_detail.xpath(BOOK_URL_XPATH).attribute('href').content}")
|
34
44
|
end
|
35
45
|
books
|
36
46
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class UserTypeProcessor
|
3
3
|
include Capybara::DSL
|
4
|
+
|
5
|
+
USER_TYPE_URL = "#{ARBookFinder::BASE_URL}/usertype.aspx"
|
4
6
|
|
5
7
|
USER_TYPES = {
|
6
8
|
student: 'Student',
|
@@ -14,7 +16,7 @@ module ARBookFinder
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def process
|
17
|
-
visit(
|
19
|
+
visit(USER_TYPE_URL)
|
18
20
|
choose(@user_type)
|
19
21
|
click_button('Submit')
|
20
22
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Collection' do
|
4
|
+
it 'should return results for a single child hash' do
|
5
|
+
results = ARBookFinder.collection('Awards' => 'ALA Notable/Best Books')
|
6
|
+
results.books.size.should > 0
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should return results for a multi child hash' do
|
10
|
+
results = ARBookFinder.collection('State Lists' => { 'Indiana' => 'IN Young Hoosier Middle Grades Book Award Nominees 2013-2014' })
|
11
|
+
results.books.size.should > 0
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should return results for page 2' do
|
15
|
+
results = ARBookFinder.collection({ 'Awards' => 'ALA Notable/Best Books' }, 2)
|
16
|
+
results.current_page.should == 2
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should fetch book data' do
|
20
|
+
results = ARBookFinder.collection('Awards' => 'ALA Notable/Best Books')
|
21
|
+
book = results.books[0]
|
22
|
+
book.fetch
|
23
|
+
book.title.should_not be ''
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Search' do
|
4
|
+
it 'should return results for a title search' do
|
5
|
+
results = ARBookFinder.search('harry potter')
|
6
|
+
results.books.size.should > 0
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should return results for page 2' do
|
10
|
+
results = ARBookFinder.search('harry potter', 2)
|
11
|
+
results.current_page.should == 2
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should fetch book data' do
|
15
|
+
results = ARBookFinder.search('harry potter')
|
16
|
+
book = results.books[0]
|
17
|
+
book.fetch
|
18
|
+
book.title.should_not be ''
|
19
|
+
end
|
20
|
+
end
|
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: ar_book_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.
|
5
|
+
version: 1.1.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Anthony Smith
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-10-
|
12
|
+
date: 2013-10-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: capybara
|
@@ -109,7 +109,9 @@ files:
|
|
109
109
|
- lib/ar_book_finder/book.rb
|
110
110
|
- lib/ar_book_finder/book_detail_parser.rb
|
111
111
|
- lib/ar_book_finder/book_detail_processor.rb
|
112
|
+
- lib/ar_book_finder/collection_processor.rb
|
112
113
|
- lib/ar_book_finder/configuration.rb
|
114
|
+
- lib/ar_book_finder/constants.rb
|
113
115
|
- lib/ar_book_finder/pagination_processor.rb
|
114
116
|
- lib/ar_book_finder/publisher.rb
|
115
117
|
- lib/ar_book_finder/quick_search_processor.rb
|
@@ -118,8 +120,8 @@ files:
|
|
118
120
|
- lib/ar_book_finder/user_type_processor.rb
|
119
121
|
- lib/ar_book_finder/version.rb
|
120
122
|
- spec/.keep
|
121
|
-
- spec/ar_book_finder/
|
122
|
-
- spec/
|
123
|
+
- spec/ar_book_finder/collection_spec.rb
|
124
|
+
- spec/ar_book_finder/search_spec.rb
|
123
125
|
- spec/spec_helper.rb
|
124
126
|
homepage: https://github.com/anthonator/ar-book-finder
|
125
127
|
licenses:
|
@@ -148,6 +150,6 @@ specification_version: 3
|
|
148
150
|
summary: Crawls and parses data on arbookfind.com and returns book data in an easy to use format
|
149
151
|
test_files:
|
150
152
|
- spec/.keep
|
151
|
-
- spec/ar_book_finder/
|
152
|
-
- spec/
|
153
|
+
- spec/ar_book_finder/collection_spec.rb
|
154
|
+
- spec/ar_book_finder/search_spec.rb
|
153
155
|
- spec/spec_helper.rb
|
data/spec/ar_book_finder_spec.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe ARBookFinder do
|
4
|
-
describe 'search' do
|
5
|
-
it 'should return results' do
|
6
|
-
results = ARBookFinder.search('978-0590353403').books
|
7
|
-
results.size.should > 0
|
8
|
-
end
|
9
|
-
|
10
|
-
it 'should paginate and sort results' do
|
11
|
-
results = ARBookFinder.search('harry potter', 2, :title).books
|
12
|
-
results.size.should > 0
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|