amazon_wish_miner 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/amazon_wish_miner.rb +4 -1
- data/lib/amazon_wish_miner/amazon_wish.rb +67 -0
- data/lib/amazon_wish_miner/amazon_wish_list.rb +60 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc6ba9d094477598ed88ae8290160153086fd048ace3c90af8ac107265154e8e
|
4
|
+
data.tar.gz: 0afab73d342d5b7b12c004fbb88e8710ae09f3076fde0d664e627805a0a48463
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55a49aa00e70fd5e5719dc9f169113ecec79a74f666e265be6c2912589f2cbba9ea82443ef8a68f6a51c2dec90b0439fdbfa324f786ccb20605ed4a770ae08b7
|
7
|
+
data.tar.gz: c639720d3b144945655cb3f119ebf3d39203ca2f57d1719fdce2f710496ec1e2976aeb96f551b1c7e2f575ecfb37799aa8c4cfd5c68c6ac1ef481bc042b8bd77
|
data/lib/amazon_wish_miner.rb
CHANGED
@@ -0,0 +1,67 @@
|
|
1
|
+
class AmazonWish
|
2
|
+
|
3
|
+
attr_reader: :title, :id
|
4
|
+
|
5
|
+
def initialize(id, title)
|
6
|
+
@title = title
|
7
|
+
@id = id
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.parse_wishes_from_pages(page_responses)
|
11
|
+
list_items = self.list_items_from_response(page_responses)
|
12
|
+
wish_ids = self.draps_from_list_items(list_items)
|
13
|
+
# wishes_from_ids(wish_ids)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.list_items_from_response(page_responses)
|
17
|
+
page_responses.each_with_object(Array.new) do |response, list_items|
|
18
|
+
page = Nokogiri::HTML(response)
|
19
|
+
page.css('ul#g-items li').each do |li|
|
20
|
+
list_items << li
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.draps_from_list_items(list_items)
|
26
|
+
list_items.each_with_object(Array.new) do |li, wish_ids|
|
27
|
+
drap = li['data-reposition-action-params']
|
28
|
+
wish_ids << external_id_from_drap(drap)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.external_id_from_drap(drap)
|
33
|
+
attrs = drap.split(',')
|
34
|
+
attr_substrings = attrs.map { |elem| elem.split(':') }
|
35
|
+
ied_attr = attr_substrings.find { |ss| ss.include?("{\"itemExternalId\"")}
|
36
|
+
id_string = ied_attr.last
|
37
|
+
ids_arr = id_string.split('|')
|
38
|
+
ids_arr.first
|
39
|
+
end
|
40
|
+
|
41
|
+
# parsing item info from the item's own url rather than from the wishlist
|
42
|
+
#=> means that we can reuse the method below to scrape item info
|
43
|
+
|
44
|
+
def self.wishes_from_ids(ids)
|
45
|
+
ids.map do |id|
|
46
|
+
self.item_from_id(id)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.item_from_id(id)
|
51
|
+
item_url = 'https://www.amazon.com/dp/' + id
|
52
|
+
response = RestClient.get(item_url)
|
53
|
+
page = Nokogiri::HTML(response)
|
54
|
+
title = page.css('span[id$="roductTitle"]') # not a typo, css selectors are
|
55
|
+
#=> case sensetive, and we need to capture e.g. both "productTitle" and "ebookProductTitle"
|
56
|
+
# price = page.css('priceblock_ourprice')
|
57
|
+
# TODO: parse prices
|
58
|
+
# description = parse_feature_bullets(page.css('div#feature-bullets'))
|
59
|
+
# TODO: get description parsing to work for different types of items
|
60
|
+
AmazonWish.new(id, title)
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.parse_feature_bullets(feature_bullets_div)
|
64
|
+
bullets = feature_bullets_div.css('ul li')
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
class AmazonWishList
|
2
|
+
|
3
|
+
REVEAL_OPTIONS = [:all, :purchased, :unpurchased].freeze
|
4
|
+
SORT_OPTIONS = {date_added: "date-added", title: 'universal-title',
|
5
|
+
price_high: 'universal-price-desc', price_low: 'universal-price',
|
6
|
+
date_updated: 'last-updated', priority: 'priority'}.freeze
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
end
|
10
|
+
|
11
|
+
# TODO: https://www.amazon.com/hz/wishlist/ls/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS,
|
12
|
+
#=> https://www.amazon.com/gp/registry/wishlist/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS,
|
13
|
+
#=> and https://www.amazon.com/registry/wishlist/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS
|
14
|
+
#=> appear to be functionally the same. Code should reflect this when it is
|
15
|
+
#=> given links as arguments.
|
16
|
+
|
17
|
+
def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
|
18
|
+
raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
|
19
|
+
raise "invalid sort" unless SORT_OPTIONS[sort]
|
20
|
+
|
21
|
+
query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
|
22
|
+
# lek is nil for the first page
|
23
|
+
url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"
|
24
|
+
|
25
|
+
pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.get_all_wishlist_pages(url_without_qstring, query_params)
|
29
|
+
responses = Array.new
|
30
|
+
loop do
|
31
|
+
response = self.get_wishlist_page(url_without_qstring, query_params)
|
32
|
+
responses << response
|
33
|
+
return responses if response.body.include?("Find a gift") # as of the
|
34
|
+
#=> time this was written, this phrase appears only on the last page
|
35
|
+
lek = self.find_lek_from_response(response)
|
36
|
+
query_params[:lek] = lek # the rest of the query_params hash stays the same
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.get_wishlist_page(url_without_qstring, query_params)
|
41
|
+
query_string = self.page_query_string(query_params)
|
42
|
+
RestClient.get(url_without_qstring + query_string)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.page_query_string(query_params)
|
46
|
+
"?reveal=#{query_params[:reveal]}&layout=standard&sort=#{query_params[:sort_string]})" +
|
47
|
+
(query_params[:lek] ? "&lek=#{query_params[:lek]}&type=wishlist&ajax=true" : '')
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.find_lek_from_response(response)
|
51
|
+
# As of the time of writing this, "lastEvaluatedKey", abbreviated as "lek",
|
52
|
+
# is used to keep track of what portions of the wishlist have already been
|
53
|
+
# loaded, and is sent in the query string of ajax calls to get the next page
|
54
|
+
start_of_lek = response.body.split('name="lastEvaluatedKey" value="')[1]
|
55
|
+
start_of_lek.split('" class="lastEvaluatedKey"')[0]
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amazon_wish_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander V. Trujillo
|
@@ -11,7 +11,7 @@ cert_chain: []
|
|
11
11
|
date: 2018-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rest-client
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.8'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.8'
|
41
41
|
description: Modeled after Amazon Wish Lister http://doitlikejustin.github.io/amazon-wish-lister/
|
42
42
|
email:
|
43
43
|
executables: []
|
@@ -45,6 +45,8 @@ extensions: []
|
|
45
45
|
extra_rdoc_files: []
|
46
46
|
files:
|
47
47
|
- lib/amazon_wish_miner.rb
|
48
|
+
- lib/amazon_wish_miner/amazon_wish.rb
|
49
|
+
- lib/amazon_wish_miner/amazon_wish_list.rb
|
48
50
|
homepage: https://github.com/avtrujillo/amazon_wish_miner
|
49
51
|
licenses:
|
50
52
|
- MIT
|