amazon_wish_miner 0.0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/amazon_wish_miner.rb +4 -1
- data/lib/amazon_wish_miner/amazon_wish.rb +67 -0
- data/lib/amazon_wish_miner/amazon_wish_list.rb +60 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc6ba9d094477598ed88ae8290160153086fd048ace3c90af8ac107265154e8e
|
4
|
+
data.tar.gz: 0afab73d342d5b7b12c004fbb88e8710ae09f3076fde0d664e627805a0a48463
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55a49aa00e70fd5e5719dc9f169113ecec79a74f666e265be6c2912589f2cbba9ea82443ef8a68f6a51c2dec90b0439fdbfa324f786ccb20605ed4a770ae08b7
|
7
|
+
data.tar.gz: c639720d3b144945655cb3f119ebf3d39203ca2f57d1719fdce2f710496ec1e2976aeb96f551b1c7e2f575ecfb37799aa8c4cfd5c68c6ac1ef481bc042b8bd77
|
data/lib/amazon_wish_miner.rb
CHANGED
@@ -0,0 +1,67 @@
|
|
1
|
+
class AmazonWish
|
2
|
+
|
3
|
+
attr_reader: :title, :id
|
4
|
+
|
5
|
+
def initialize(id, title)
|
6
|
+
@title = title
|
7
|
+
@id = id
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.parse_wishes_from_pages(page_responses)
|
11
|
+
list_items = self.list_items_from_response(page_responses)
|
12
|
+
wish_ids = self.draps_from_list_items(list_items)
|
13
|
+
# wishes_from_ids(wish_ids)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.list_items_from_response(page_responses)
|
17
|
+
page_responses.each_with_object(Array.new) do |response, list_items|
|
18
|
+
page = Nokogiri::HTML(response)
|
19
|
+
page.css('ul#g-items li').each do |li|
|
20
|
+
list_items << li
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.draps_from_list_items(list_items)
|
26
|
+
list_items.each_with_object(Array.new) do |li, wish_ids|
|
27
|
+
drap = li['data-reposition-action-params']
|
28
|
+
wish_ids << external_id_from_drap(drap)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.external_id_from_drap(drap)
|
33
|
+
attrs = drap.split(',')
|
34
|
+
attr_substrings = attrs.map { |elem| elem.split(':') }
|
35
|
+
ied_attr = attr_substrings.find { |ss| ss.include?("{\"itemExternalId\"")}
|
36
|
+
id_string = ied_attr.last
|
37
|
+
ids_arr = id_string.split('|')
|
38
|
+
ids_arr.first
|
39
|
+
end
|
40
|
+
|
41
|
+
# parsing item info from the item's own url rather than from the wishlist
|
42
|
+
#=> means that we can reuse the method below to scrape item info
|
43
|
+
|
44
|
+
def self.wishes_from_ids(ids)
|
45
|
+
ids.map do |id|
|
46
|
+
self.item_from_id(id)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.item_from_id(id)
|
51
|
+
item_url = 'https://www.amazon.com/dp/' + id
|
52
|
+
response = RestClient.get(item_url)
|
53
|
+
page = Nokogiri::HTML(response)
|
54
|
+
title = page.css('span[id$="roductTitle"]') # not a typo, css selectors are
|
55
|
+
#=> case sensetive, and we need to capture e.g. both "productTitle" and "ebookProductTitle"
|
56
|
+
# price = page.css('priceblock_ourprice')
|
57
|
+
# TODO: parse prices
|
58
|
+
# description = parse_feature_bullets(page.css('div#feature-bullets'))
|
59
|
+
# TODO: get description parsing to work for different types of items
|
60
|
+
AmazonWish.new(id, title)
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.parse_feature_bullets(feature_bullets_div)
|
64
|
+
bullets = feature_bullets_div.css('ul li')
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
class AmazonWishList
|
2
|
+
|
3
|
+
REVEAL_OPTIONS = [:all, :purchased, :unpurchased].freeze
|
4
|
+
SORT_OPTIONS = {date_added: "date-added", title: 'universal-title',
|
5
|
+
price_high: 'universal-price-desc', price_low: 'universal-price',
|
6
|
+
date_updated: 'last-updated', priority: 'priority'}.freeze
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
end
|
10
|
+
|
11
|
+
# TODO: https://www.amazon.com/hz/wishlist/ls/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS,
|
12
|
+
#=> https://www.amazon.com/gp/registry/wishlist/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS,
|
13
|
+
#=> and https://www.amazon.com/registry/wishlist/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS
|
14
|
+
#=> appear to be functionally the same. Code should reflect this when it is
|
15
|
+
#=> given links as arguments.
|
16
|
+
|
17
|
+
def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
|
18
|
+
raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
|
19
|
+
raise "invalid sort" unless SORT_OPTIONS[sort]
|
20
|
+
|
21
|
+
query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
|
22
|
+
# lek is nil for the first page
|
23
|
+
url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"
|
24
|
+
|
25
|
+
pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.get_all_wishlist_pages(url_without_qstring, query_params)
|
29
|
+
responses = Array.new
|
30
|
+
loop do
|
31
|
+
response = self.get_wishlist_page(url_without_qstring, query_params)
|
32
|
+
responses << response
|
33
|
+
return responses if response.body.include?("Find a gift") # as of the
|
34
|
+
#=> time this was written, this phrase appears only on the last page
|
35
|
+
lek = self.find_lek_from_response(response)
|
36
|
+
query_params[:lek] = lek # the rest of the query_params hash stays the same
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.get_wishlist_page(url_without_qstring, query_params)
|
41
|
+
query_string = self.page_query_string(query_params)
|
42
|
+
RestClient.get(url_without_qstring + query_string)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.page_query_string(query_params)
|
46
|
+
"?reveal=#{query_params[:reveal]}&layout=standard&sort=#{query_params[:sort_string]})" +
|
47
|
+
(query_params[:lek] ? "&lek=#{query_params[:lek]}&type=wishlist&ajax=true" : '')
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.find_lek_from_response(response)
|
51
|
+
# As of the time of writing this, "lastEvaluatedKey", abbreviated as "lek",
|
52
|
+
# is used to keep track of what portions of the wishlist have already been
|
53
|
+
# loaded, and is sent in the query string of ajax calls to get the next page
|
54
|
+
start_of_lek = response.body.split('name="lastEvaluatedKey" value="')[1]
|
55
|
+
start_of_lek.split('" class="lastEvaluatedKey"')[0]
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amazon_wish_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander V. Trujillo
|
@@ -11,7 +11,7 @@ cert_chain: []
|
|
11
11
|
date: 2018-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rest-client
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.8'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.8'
|
41
41
|
description: Modeled after Amazon Wish Lister http://doitlikejustin.github.io/amazon-wish-lister/
|
42
42
|
email:
|
43
43
|
executables: []
|
@@ -45,6 +45,8 @@ extensions: []
|
|
45
45
|
extra_rdoc_files: []
|
46
46
|
files:
|
47
47
|
- lib/amazon_wish_miner.rb
|
48
|
+
- lib/amazon_wish_miner/amazon_wish.rb
|
49
|
+
- lib/amazon_wish_miner/amazon_wish_list.rb
|
48
50
|
homepage: https://github.com/avtrujillo/amazon_wish_miner
|
49
51
|
licenses:
|
50
52
|
- MIT
|