amazon_wish_miner 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 320443f261d0231425f425704d2990492afedc27820e0a2b78ae42d17d4f8c20
4
- data.tar.gz: 1b9f85e4fe95da69ee320ecc06d1acb96b4a6410f9233ffc2ab925930698eba4
3
+ metadata.gz: dc6ba9d094477598ed88ae8290160153086fd048ace3c90af8ac107265154e8e
4
+ data.tar.gz: 0afab73d342d5b7b12c004fbb88e8710ae09f3076fde0d664e627805a0a48463
5
5
  SHA512:
6
- metadata.gz: 25ca33d4e161e75ccc44a9084b276350f3fec5bc6b85150b9885484229df4641f8c24a98ddf327a98169697cb39504eb34b1ccf5ae32ed1445b92841a888db17
7
- data.tar.gz: f476836203c819b1f1a0f27ae1147aad51f61f88d9f9b93d6019288a8f05b21f80425f4abb53a8af8a6758a5218f046a81e46ad8a99bd7e2570916587a54321d
6
+ metadata.gz: 55a49aa00e70fd5e5719dc9f169113ecec79a74f666e265be6c2912589f2cbba9ea82443ef8a68f6a51c2dec90b0439fdbfa324f786ccb20605ed4a770ae08b7
7
+ data.tar.gz: c639720d3b144945655cb3f119ebf3d39203ca2f57d1719fdce2f710496ec1e2976aeb96f551b1c7e2f575ecfb37799aa8c4cfd5c68c6ac1ef481bc042b8bd77
@@ -1 +1,4 @@
1
- puts 'hello world'
1
+ require 'nokogiri'
2
+ require 'rest-client'
3
+ require_relative 'amazon_wish_miner/amazon_wish_list.rb'
4
+ require_relative 'amazon_wish_miner/amazon_wish.rb'
@@ -0,0 +1,67 @@
1
+ class AmazonWish
2
+
3
+ attr_reader: :title, :id
4
+
5
+ def initialize(id, title)
6
+ @title = title
7
+ @id = id
8
+ end
9
+
10
+ def self.parse_wishes_from_pages(page_responses)
11
+ list_items = self.list_items_from_response(page_responses)
12
+ wish_ids = self.draps_from_list_items(list_items)
13
+ # wishes_from_ids(wish_ids)
14
+ end
15
+
16
+ def self.list_items_from_response(page_responses)
17
+ page_responses.each_with_object(Array.new) do |response, list_items|
18
+ page = Nokogiri::HTML(response)
19
+ page.css('ul#g-items li').each do |li|
20
+ list_items << li
21
+ end
22
+ end
23
+ end
24
+
25
+ def self.draps_from_list_items(list_items)
26
+ list_items.each_with_object(Array.new) do |li, wish_ids|
27
+ drap = li['data-reposition-action-params']
28
+ wish_ids << external_id_from_drap(drap)
29
+ end
30
+ end
31
+
32
+ def self.external_id_from_drap(drap)
33
+ attrs = drap.split(',')
34
+ attr_substrings = attrs.map { |elem| elem.split(':') }
35
+ ied_attr = attr_substrings.find { |ss| ss.include?("{\"itemExternalId\"")}
36
+ id_string = ied_attr.last
37
+ ids_arr = id_string.split('|')
38
+ ids_arr.first
39
+ end
40
+
41
+ # parsing item info from the item's own url rather than from the wishlist
42
+ #=> means that we can reuse the method below to scrape item info
43
+
44
+ def self.wishes_from_ids(ids)
45
+ ids.map do |id|
46
+ self.item_from_id(id)
47
+ end
48
+ end
49
+
50
+ def self.item_from_id(id)
51
+ item_url = 'https://www.amazon.com/dp/' + id
52
+ response = RestClient.get(item_url)
53
+ page = Nokogiri::HTML(response)
54
+ title = page.css('span[id$="roductTitle"]') # not a typo, css selectors are
55
+ #=> case sensetive, and we need to capture e.g. both "productTitle" and "ebookProductTitle"
56
+ # price = page.css('priceblock_ourprice')
57
+ # TODO: parse prices
58
+ # description = parse_feature_bullets(page.css('div#feature-bullets'))
59
+ # TODO: get description parsing to work for different types of items
60
+ AmazonWish.new(id, title)
61
+ end
62
+
63
+ def self.parse_feature_bullets(feature_bullets_div)
64
+ bullets = feature_bullets_div.css('ul li')
65
+ end
66
+
67
+ end
@@ -0,0 +1,60 @@
1
+ class AmazonWishList
2
+
3
+ REVEAL_OPTIONS = [:all, :purchased, :unpurchased].freeze
4
+ SORT_OPTIONS = {date_added: "date-added", title: 'universal-title',
5
+ price_high: 'universal-price-desc', price_low: 'universal-price',
6
+ date_updated: 'last-updated', priority: 'priority'}.freeze
7
+
8
+ def initialize
9
+ end
10
+
11
+ # TODO: https://www.amazon.com/hz/wishlist/ls/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS,
12
+ #=> https://www.amazon.com/gp/registry/wishlist/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS,
13
+ #=> and https://www.amazon.com/registry/wishlist/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS
14
+ #=> appear to be functionally the same. Code should reflect this when it is
15
+ #=> given links as arguments.
16
+
17
+ def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
18
+ raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
19
+ raise "invalid sort" unless SORT_OPTIONS[sort]
20
+
21
+ query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
22
+ # lek is nil for the first page
23
+ url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"
24
+
25
+ pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
26
+ end
27
+
28
+ def self.get_all_wishlist_pages(url_without_qstring, query_params)
29
+ responses = Array.new
30
+ loop do
31
+ response = self.get_wishlist_page(url_without_qstring, query_params)
32
+ responses << response
33
+ return responses if response.body.include?("Find a gift") # as of the
34
+ #=> time this was written, this phrase appears only on the last page
35
+ lek = self.find_lek_from_response(response)
36
+ query_params[:lek] = lek # the rest of the query_params hash stays the same
37
+ end
38
+ end
39
+
40
+ def self.get_wishlist_page(url_without_qstring, query_params)
41
+ query_string = self.page_query_string(query_params)
42
+ RestClient.get(url_without_qstring + query_string)
43
+ end
44
+
45
+ def self.page_query_string(query_params)
46
+ "?reveal=#{query_params[:reveal]}&layout=standard&sort=#{query_params[:sort_string]})" +
47
+ (query_params[:lek] ? "&lek=#{query_params[:lek]}&type=wishlist&ajax=true" : '')
48
+ end
49
+
50
+ def self.find_lek_from_response(response)
51
+ # As of the time of writing this, "lastEvaluatedKey", abbreviated as "lek",
52
+ # is used to keep track of what portions of the wishlist have already been
53
+ # loaded, and is sent in the query string of ajax calls to get the next page
54
+ start_of_lek = response.body.split('name="lastEvaluatedKey" value="')[1]
55
+ start_of_lek.split('" class="lastEvaluatedKey"')[0]
56
+ end
57
+
58
+
59
+
60
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amazon_wish_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander V. Trujillo
@@ -11,7 +11,7 @@ cert_chain: []
11
11
  date: 2018-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: HTTParty
14
+ name: rest-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2'
33
+ version: '1.8'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2'
40
+ version: '1.8'
41
41
  description: Modeled after Amazon Wish Lister http://doitlikejustin.github.io/amazon-wish-lister/
42
42
  email:
43
43
  executables: []
@@ -45,6 +45,8 @@ extensions: []
45
45
  extra_rdoc_files: []
46
46
  files:
47
47
  - lib/amazon_wish_miner.rb
48
+ - lib/amazon_wish_miner/amazon_wish.rb
49
+ - lib/amazon_wish_miner/amazon_wish_list.rb
48
50
  homepage: https://github.com/avtrujillo/amazon_wish_miner
49
51
  licenses:
50
52
  - MIT