ar_book_finder 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +19 -6
- data/lib/ar_book_finder.rb +6 -4
- data/lib/ar_book_finder/book.rb +8 -3
- data/lib/ar_book_finder/collection_processor.rb +30 -0
- data/lib/ar_book_finder/constants.rb +3 -0
- data/lib/ar_book_finder/pagination_processor.rb +10 -16
- data/lib/ar_book_finder/quick_search_processor.rb +6 -6
- data/lib/ar_book_finder/scraper.rb +23 -5
- data/lib/ar_book_finder/search_results_parser.rb +18 -8
- data/lib/ar_book_finder/user_type_processor.rb +3 -1
- data/lib/ar_book_finder/version.rb +1 -1
- data/spec/ar_book_finder/collection_spec.rb +25 -0
- data/spec/ar_book_finder/search_spec.rb +20 -0
- metadata +8 -6
- data/spec/ar_book_finder/book_spec.rb +0 -10
- data/spec/ar_book_finder_spec.rb +0 -15
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Retrieve book data from [arbookfind.com](http://www.arbookfind.com)
|
4
4
|
|
5
|
-
[](https://travis-ci.org/anthonator/ar-book-finder) [](https://gemnasium.com/anthonator/ar-book-finder) [](https://coveralls.io/r/anthonator/ar-book-finder) [](https://codeclimate.com/github/anthonator/ar-book-finder)
|
5
|
+
[](http://badge.fury.io/rb/ar_book_finder) [](https://travis-ci.org/anthonator/ar-book-finder) [](https://gemnasium.com/anthonator/ar-book-finder) [](https://coveralls.io/r/anthonator/ar-book-finder) [](https://codeclimate.com/github/anthonator/ar-book-finder)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -68,7 +68,7 @@ And publisher details:
|
|
68
68
|
## Usage
|
69
69
|
|
70
70
|
### Quick Search
|
71
|
-
Quick search
|
71
|
+
Quick search will allow you to perform searches based on title, topics, author or ISBN.
|
72
72
|
|
73
73
|
Perform a quick search...
|
74
74
|
```ruby
|
@@ -79,7 +79,7 @@ results.books # Retrieve the books returned on this page
|
|
79
79
|
Load book data on demand using ```#fetch```...
|
80
80
|
``` ruby
|
81
81
|
...
|
82
|
-
book = results[0]
|
82
|
+
book = results.book[0]
|
83
83
|
book.fetch # Retrieve book data
|
84
84
|
book.title
|
85
85
|
book.author
|
@@ -94,11 +94,24 @@ publisher.isbn
|
|
94
94
|
...
|
95
95
|
```
|
96
96
|
|
97
|
-
###
|
98
|
-
|
97
|
+
### Collections
|
98
|
+
Collections will retrieve books lists. Examples include awards, state lists, etc.
|
99
|
+
|
100
|
+
Retrieve a collection...
|
101
|
+
```ruby
|
102
|
+
results = ARBookFinder.collection({ 'Awards' => 'ALA Notable/Best Books' })
|
103
|
+
```
|
104
|
+
|
105
|
+
Retrieve a collection with a multi-level hash...
|
106
|
+
```ruby
|
107
|
+
results = ARBookFinder.collection({ 'State Lists' => { 'Indiana' => 'IN Young Hoosier Middle Grades Book Award Nominees 2013-2014' } })
|
108
|
+
```
|
109
|
+
|
110
|
+
### Pagination
|
111
|
+
It's also possible to paginate search results.
|
99
112
|
```ruby
|
100
113
|
# Retrieve results for page 2 and sort on title
|
101
|
-
results = ARBookFinder.
|
114
|
+
results = ARBookFinder.search('harry potter', 2)
|
102
115
|
```
|
103
116
|
|
104
117
|
## Contributing
|
data/lib/ar_book_finder.rb
CHANGED
@@ -2,9 +2,11 @@ require 'capybara'
|
|
2
2
|
require 'capybara/poltergeist'
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
|
+
require 'ar_book_finder/constants'
|
5
6
|
require 'ar_book_finder/user_type_processor'
|
6
7
|
require 'ar_book_finder/pagination_processor'
|
7
8
|
require 'ar_book_finder/quick_search_processor'
|
9
|
+
require 'ar_book_finder/collection_processor'
|
8
10
|
require 'ar_book_finder/book_detail_processor'
|
9
11
|
require 'ar_book_finder/search_results_parser'
|
10
12
|
require 'ar_book_finder/book_detail_parser'
|
@@ -24,15 +26,15 @@ module ARBookFinder
|
|
24
26
|
Scraper.new(user_type)
|
25
27
|
end
|
26
28
|
|
27
|
-
def self.search(query, page = 1
|
28
|
-
scraper(options[:user_type]).search(query, page
|
29
|
+
def self.search(query, page = 1)
|
30
|
+
scraper(options[:user_type]).search(query, page)
|
29
31
|
end
|
30
32
|
|
31
33
|
def self.advanced_search(user_type, search_type, params)
|
32
34
|
raise 'Not yet implemented'
|
33
35
|
end
|
34
36
|
|
35
|
-
def self.collection(
|
36
|
-
|
37
|
+
def self.collection(collection, page = 1)
|
38
|
+
scraper(options[:user_type]).collection(collection, page)
|
37
39
|
end
|
38
40
|
end
|
data/lib/ar_book_finder/book.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
module ARBookFinder
|
2
4
|
class Book
|
3
5
|
attr_reader :cover, :title, :author, :summary,
|
@@ -29,12 +31,11 @@ module ARBookFinder
|
|
29
31
|
load_ar_quiz_availability(parsed_data[:ar_quiz_availability])
|
30
32
|
load_topics(parsed_data[:topics])
|
31
33
|
load_series(parsed_data[:series])
|
32
|
-
|
33
|
-
parsed_data[:publishers].each { |p| @publishers << Publisher.new(p) }
|
34
|
+
load_publishers(parsed_data[:publishers])
|
34
35
|
end
|
35
36
|
|
36
37
|
def load_ar_quiz_availability(ar_quiz_availability)
|
37
|
-
@ar_quiz_availability = ar_quiz_availability.split(',').collect { |v| v.strip }
|
38
|
+
@ar_quiz_availability = ar_quiz_availability.split(',').collect { |v| v.strip.gsub(' ', '') }
|
38
39
|
end
|
39
40
|
|
40
41
|
def load_topics(topics)
|
@@ -46,5 +47,9 @@ module ARBookFinder
|
|
46
47
|
def load_series(series)
|
47
48
|
@series = series.split(';').collect { |v| v.strip }
|
48
49
|
end
|
50
|
+
|
51
|
+
def load_publishers(publishers)
|
52
|
+
publishers.each { |p| @publishers << Publisher.new(p) }
|
53
|
+
end
|
49
54
|
end
|
50
55
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module ARBookFinder
|
2
|
+
class CollectionProcessor
|
3
|
+
include Capybara::DSL
|
4
|
+
|
5
|
+
COLLECTIONS_URL = "#{ARBookFinder::BASE_URL}/collections.aspx"
|
6
|
+
|
7
|
+
def initialize(collections)
|
8
|
+
@collections = collections
|
9
|
+
end
|
10
|
+
|
11
|
+
def process
|
12
|
+
unless current_url.downcase == COLLECTIONS_URL
|
13
|
+
visit(COLLECTIONS_URL)
|
14
|
+
end
|
15
|
+
navigate_collection(@collections)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def navigate_collection(collection)
|
20
|
+
collection.each do |key, value|
|
21
|
+
click_on(key)
|
22
|
+
if value.kind_of?(Hash)
|
23
|
+
navigate_collection(value)
|
24
|
+
else
|
25
|
+
click_on(value)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -1,27 +1,21 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class PaginationProcessor
|
3
3
|
include Capybara::DSL
|
4
|
+
|
5
|
+
SEARCH_GOTO_FIELD = 'ctl00_ContentPlaceHolder1_ucSeachResults_txtPageToGoToTop'
|
6
|
+
SEARCH_SUBMIT_BUTTON = 'ctl00_ContentPlaceHolder1_ucSeachResults_btnGoToPageTop'
|
7
|
+
|
8
|
+
COLLECTION_GOTO_FIELD = 'ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_txtPageToGoToTop'
|
9
|
+
COLLECTION_SUBMIT_BUTTON = 'ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_btnGoToPageTop'
|
4
10
|
|
5
|
-
|
6
|
-
|
7
|
-
SORT_BY_TYPES = {
|
8
|
-
title: 'Title',
|
9
|
-
author: 'Author',
|
10
|
-
interest_level: 'Interest Level',
|
11
|
-
book_level: 'Book Level',
|
12
|
-
relevance: 'Relevance',
|
13
|
-
rating: 'Rating'
|
14
|
-
}
|
15
|
-
|
16
|
-
def initialize(page, sort_by)
|
11
|
+
def initialize(page, collection = false)
|
17
12
|
@page = page
|
18
|
-
@
|
13
|
+
@field_const = collection ? :COLLECTION : :SEARCH
|
19
14
|
end
|
20
15
|
|
21
16
|
def process
|
22
|
-
fill_in(
|
23
|
-
|
24
|
-
click_button('ctl00_ContentPlaceHolder1_ucSeachResults_btnGoToPageTop')
|
17
|
+
fill_in(self.class.const_get(:"#{@field_const}_GOTO_FIELD"), with: @page)
|
18
|
+
click_button(self.class.const_get(:"#{@field_const}_SUBMIT_BUTTON"))
|
25
19
|
end
|
26
20
|
end
|
27
21
|
end
|
@@ -1,19 +1,19 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class QuickSearchProcessor
|
3
3
|
include Capybara::DSL
|
4
|
+
|
5
|
+
QUICK_SEARCH_URL = "#{ARBookFinder::BASE_URL}/default.aspx"
|
4
6
|
|
5
|
-
def initialize(query
|
7
|
+
def initialize(query)
|
6
8
|
@query = query
|
7
|
-
@page = page
|
8
|
-
@sort_by = sort_by
|
9
9
|
end
|
10
10
|
|
11
11
|
def process
|
12
|
+
unless current_url.downcase == QUICK_SEARCH_URL
|
13
|
+
visit(QUICK_SEARCH_URL)
|
14
|
+
end
|
12
15
|
fill_in('ctl00_ContentPlaceHolder1_txtKeyWords', with: @query)
|
13
16
|
click_button('ctl00_ContentPlaceHolder1_btnDoIt')
|
14
|
-
if @page > 1 || @sort_by != PaginationProcessor::DEFAULT_SORT_BY
|
15
|
-
PaginationProcessor.new(@page, @sort_by).process
|
16
|
-
end
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
@@ -1,13 +1,31 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class Scraper
|
3
3
|
def initialize(user_type)
|
4
|
-
|
4
|
+
UserTypeProcessor.new(user_type).process
|
5
5
|
end
|
6
6
|
|
7
|
-
def search(query, page = 1
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def search(query, page = 1)
|
8
|
+
QuickSearchProcessor.new(query).process
|
9
|
+
results = SearchResultsParser.new(Capybara.page.html).parse
|
10
|
+
if page > 1
|
11
|
+
results = paginate(page, false)
|
12
|
+
end
|
13
|
+
results
|
14
|
+
end
|
15
|
+
|
16
|
+
def collection(collection, page = 1)
|
17
|
+
CollectionProcessor.new(collection).process
|
18
|
+
results = SearchResultsParser.new(Capybara.page.html, true).parse
|
19
|
+
if page > 1
|
20
|
+
results = paginate(page, true)
|
21
|
+
end
|
22
|
+
results
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def paginate(page, collection)
|
27
|
+
PaginationProcessor.new(page, collection).process
|
28
|
+
SearchResultsParser.new(Capybara.page.html, collection).parse
|
11
29
|
end
|
12
30
|
end
|
13
31
|
end
|
@@ -1,36 +1,46 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class SearchResultsParser
|
3
|
-
|
4
|
-
|
3
|
+
SEARCH_PAGE_COUNT_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucSeachResults_lblResultsSummaryTop"]'
|
4
|
+
SEARCH_RESULTS_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucSeachResults_lblQuizzes"]/table'
|
5
|
+
|
6
|
+
COLLECTION_PAGE_COUNT_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_lblResultsSummaryTop"]'
|
7
|
+
COLLECTION_RESULTS_XPATH = '//*[@id="ctl00_ContentPlaceHolder1_ucCollection_ucSeachResults_lblQuizzes"]/table'
|
8
|
+
|
5
9
|
BOOK_XPATH = 'tbody/tr/td[2]'
|
6
10
|
BOOK_DETAIL_XPATH = 'table/tbody/tr/td[2]'
|
7
11
|
BOOK_URL_XPATH = 'a'
|
8
12
|
|
9
|
-
attr_reader :page_count, :books
|
13
|
+
attr_reader :current_page, :page_count, :total_books, :books
|
10
14
|
|
11
|
-
def initialize(html)
|
15
|
+
def initialize(html, collection = false)
|
12
16
|
@doc = Nokogiri::HTML.parse(html)
|
17
|
+
@xpath_const = collection ? :COLLECTION : :SEARCH
|
13
18
|
@books = []
|
14
19
|
end
|
15
20
|
|
16
21
|
def parse
|
17
|
-
@
|
22
|
+
@current_page = parse_current_page.to_i
|
23
|
+
@page_count = parse_page_count.to_i
|
18
24
|
@books = parse_results
|
19
25
|
self
|
20
26
|
end
|
21
27
|
|
22
28
|
private
|
29
|
+
def parse_current_page
|
30
|
+
@doc.xpath(self.class.const_get(:"#{@xpath_const}_PAGE_COUNT_XPATH")).text.gsub(/Page /, '').gsub(/ of \d+/, '')
|
31
|
+
end
|
32
|
+
|
23
33
|
def parse_page_count
|
24
|
-
@doc.xpath(
|
34
|
+
@doc.xpath(self.class.const_get(:"#{@xpath_const}_PAGE_COUNT_XPATH")).text.gsub(/Page \d+ of /, '')
|
25
35
|
end
|
26
36
|
|
27
37
|
def parse_results
|
28
38
|
books = []
|
29
|
-
@doc.xpath(
|
39
|
+
@doc.xpath(self.class.const_get(:"#{@xpath_const}_RESULTS_XPATH")).each_with_index do |result, i|
|
30
40
|
next if i.odd?
|
31
41
|
book = result.xpath(BOOK_XPATH)
|
32
42
|
book_detail = book.xpath(BOOK_DETAIL_XPATH)
|
33
|
-
books << Book.new(
|
43
|
+
books << Book.new("#{ARBookFinder::BASE_URL}/#{book_detail.xpath(BOOK_URL_XPATH).attribute('href').content}")
|
34
44
|
end
|
35
45
|
books
|
36
46
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module ARBookFinder
|
2
2
|
class UserTypeProcessor
|
3
3
|
include Capybara::DSL
|
4
|
+
|
5
|
+
USER_TYPE_URL = "#{ARBookFinder::BASE_URL}/usertype.aspx"
|
4
6
|
|
5
7
|
USER_TYPES = {
|
6
8
|
student: 'Student',
|
@@ -14,7 +16,7 @@ module ARBookFinder
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def process
|
17
|
-
visit(
|
19
|
+
visit(USER_TYPE_URL)
|
18
20
|
choose(@user_type)
|
19
21
|
click_button('Submit')
|
20
22
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Collection' do
|
4
|
+
it 'should return results for a single child hash' do
|
5
|
+
results = ARBookFinder.collection('Awards' => 'ALA Notable/Best Books')
|
6
|
+
results.books.size.should > 0
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should return results for a multi child hash' do
|
10
|
+
results = ARBookFinder.collection('State Lists' => { 'Indiana' => 'IN Young Hoosier Middle Grades Book Award Nominees 2013-2014' })
|
11
|
+
results.books.size.should > 0
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should return results for page 2' do
|
15
|
+
results = ARBookFinder.collection({ 'Awards' => 'ALA Notable/Best Books' }, 2)
|
16
|
+
results.current_page.should == 2
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should fetch book data' do
|
20
|
+
results = ARBookFinder.collection('Awards' => 'ALA Notable/Best Books')
|
21
|
+
book = results.books[0]
|
22
|
+
book.fetch
|
23
|
+
book.title.should_not be ''
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Search' do
|
4
|
+
it 'should return results for a title search' do
|
5
|
+
results = ARBookFinder.search('harry potter')
|
6
|
+
results.books.size.should > 0
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should return results for page 2' do
|
10
|
+
results = ARBookFinder.search('harry potter', 2)
|
11
|
+
results.current_page.should == 2
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should fetch book data' do
|
15
|
+
results = ARBookFinder.search('harry potter')
|
16
|
+
book = results.books[0]
|
17
|
+
book.fetch
|
18
|
+
book.title.should_not be ''
|
19
|
+
end
|
20
|
+
end
|
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: ar_book_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.
|
5
|
+
version: 1.1.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Anthony Smith
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-10-
|
12
|
+
date: 2013-10-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: capybara
|
@@ -109,7 +109,9 @@ files:
|
|
109
109
|
- lib/ar_book_finder/book.rb
|
110
110
|
- lib/ar_book_finder/book_detail_parser.rb
|
111
111
|
- lib/ar_book_finder/book_detail_processor.rb
|
112
|
+
- lib/ar_book_finder/collection_processor.rb
|
112
113
|
- lib/ar_book_finder/configuration.rb
|
114
|
+
- lib/ar_book_finder/constants.rb
|
113
115
|
- lib/ar_book_finder/pagination_processor.rb
|
114
116
|
- lib/ar_book_finder/publisher.rb
|
115
117
|
- lib/ar_book_finder/quick_search_processor.rb
|
@@ -118,8 +120,8 @@ files:
|
|
118
120
|
- lib/ar_book_finder/user_type_processor.rb
|
119
121
|
- lib/ar_book_finder/version.rb
|
120
122
|
- spec/.keep
|
121
|
-
- spec/ar_book_finder/
|
122
|
-
- spec/
|
123
|
+
- spec/ar_book_finder/collection_spec.rb
|
124
|
+
- spec/ar_book_finder/search_spec.rb
|
123
125
|
- spec/spec_helper.rb
|
124
126
|
homepage: https://github.com/anthonator/ar-book-finder
|
125
127
|
licenses:
|
@@ -148,6 +150,6 @@ specification_version: 3
|
|
148
150
|
summary: Crawls and parses data on arbookfind.com and returns book data in an easy to use format
|
149
151
|
test_files:
|
150
152
|
- spec/.keep
|
151
|
-
- spec/ar_book_finder/
|
152
|
-
- spec/
|
153
|
+
- spec/ar_book_finder/collection_spec.rb
|
154
|
+
- spec/ar_book_finder/search_spec.rb
|
153
155
|
- spec/spec_helper.rb
|
data/spec/ar_book_finder_spec.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe ARBookFinder do
|
4
|
-
describe 'search' do
|
5
|
-
it 'should return results' do
|
6
|
-
results = ARBookFinder.search('978-0590353403').books
|
7
|
-
results.size.should > 0
|
8
|
-
end
|
9
|
-
|
10
|
-
it 'should paginate and sort results' do
|
11
|
-
results = ARBookFinder.search('harry potter', 2, :title).books
|
12
|
-
results.size.should > 0
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|