amazon_wish_miner 0.0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 320443f261d0231425f425704d2990492afedc27820e0a2b78ae42d17d4f8c20
4
- data.tar.gz: 1b9f85e4fe95da69ee320ecc06d1acb96b4a6410f9233ffc2ab925930698eba4
3
+ metadata.gz: dc6ba9d094477598ed88ae8290160153086fd048ace3c90af8ac107265154e8e
4
+ data.tar.gz: 0afab73d342d5b7b12c004fbb88e8710ae09f3076fde0d664e627805a0a48463
5
5
  SHA512:
6
- metadata.gz: 25ca33d4e161e75ccc44a9084b276350f3fec5bc6b85150b9885484229df4641f8c24a98ddf327a98169697cb39504eb34b1ccf5ae32ed1445b92841a888db17
7
- data.tar.gz: f476836203c819b1f1a0f27ae1147aad51f61f88d9f9b93d6019288a8f05b21f80425f4abb53a8af8a6758a5218f046a81e46ad8a99bd7e2570916587a54321d
6
+ metadata.gz: 55a49aa00e70fd5e5719dc9f169113ecec79a74f666e265be6c2912589f2cbba9ea82443ef8a68f6a51c2dec90b0439fdbfa324f786ccb20605ed4a770ae08b7
7
+ data.tar.gz: c639720d3b144945655cb3f119ebf3d39203ca2f57d1719fdce2f710496ec1e2976aeb96f551b1c7e2f575ecfb37799aa8c4cfd5c68c6ac1ef481bc042b8bd77
@@ -1 +1,4 @@
1
- puts 'hello world'
1
+ require 'nokogiri'
2
+ require 'rest-client'
3
+ require_relative 'amazon_wish_miner/amazon_wish_list.rb'
4
+ require_relative 'amazon_wish_miner/amazon_wish.rb'
@@ -0,0 +1,67 @@
1
+ class AmazonWish
2
+
3
+ attr_reader: :title, :id
4
+
5
+ def initialize(id, title)
6
+ @title = title
7
+ @id = id
8
+ end
9
+
10
+ def self.parse_wishes_from_pages(page_responses)
11
+ list_items = self.list_items_from_response(page_responses)
12
+ wish_ids = self.draps_from_list_items(list_items)
13
+ # wishes_from_ids(wish_ids)
14
+ end
15
+
16
+ def self.list_items_from_response(page_responses)
17
+ page_responses.each_with_object(Array.new) do |response, list_items|
18
+ page = Nokogiri::HTML(response)
19
+ page.css('ul#g-items li').each do |li|
20
+ list_items << li
21
+ end
22
+ end
23
+ end
24
+
25
+ def self.draps_from_list_items(list_items)
26
+ list_items.each_with_object(Array.new) do |li, wish_ids|
27
+ drap = li['data-reposition-action-params']
28
+ wish_ids << external_id_from_drap(drap)
29
+ end
30
+ end
31
+
32
+ def self.external_id_from_drap(drap)
33
+ attrs = drap.split(',')
34
+ attr_substrings = attrs.map { |elem| elem.split(':') }
35
+ ied_attr = attr_substrings.find { |ss| ss.include?("{\"itemExternalId\"")}
36
+ id_string = ied_attr.last
37
+ ids_arr = id_string.split('|')
38
+ ids_arr.first
39
+ end
40
+
41
+ # parsing item info from the item's own url rather than from the wishlist
42
+ #=> means that we can reuse the method below to scrape item info
43
+
44
+ def self.wishes_from_ids(ids)
45
+ ids.map do |id|
46
+ self.item_from_id(id)
47
+ end
48
+ end
49
+
50
+ def self.item_from_id(id)
51
+ item_url = 'https://www.amazon.com/dp/' + id
52
+ response = RestClient.get(item_url)
53
+ page = Nokogiri::HTML(response)
54
+ title = page.css('span[id$="roductTitle"]') # not a typo, css selectors are
55
+ #=> case sensetive, and we need to capture e.g. both "productTitle" and "ebookProductTitle"
56
+ # price = page.css('priceblock_ourprice')
57
+ # TODO: parse prices
58
+ # description = parse_feature_bullets(page.css('div#feature-bullets'))
59
+ # TODO: get description parsing to work for different types of items
60
+ AmazonWish.new(id, title)
61
+ end
62
+
63
+ def self.parse_feature_bullets(feature_bullets_div)
64
+ bullets = feature_bullets_div.css('ul li')
65
+ end
66
+
67
+ end
@@ -0,0 +1,60 @@
1
+ class AmazonWishList
2
+
3
+ REVEAL_OPTIONS = [:all, :purchased, :unpurchased].freeze
4
+ SORT_OPTIONS = {date_added: "date-added", title: 'universal-title',
5
+ price_high: 'universal-price-desc', price_low: 'universal-price',
6
+ date_updated: 'last-updated', priority: 'priority'}.freeze
7
+
8
+ def initialize
9
+ end
10
+
11
+ # TODO: https://www.amazon.com/hz/wishlist/ls/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS,
12
+ #=> https://www.amazon.com/gp/registry/wishlist/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS,
13
+ #=> and https://www.amazon.com/registry/wishlist/2WHUDN1UIDVUT/ref=cm_sw_r_cp_ep_ws_8xNVBb731TTMS
14
+ #=> appear to be functionally the same. Code should reflect this when it is
15
+ #=> given links as arguments.
16
+
17
+ def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
18
+ raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
19
+ raise "invalid sort" unless SORT_OPTIONS[sort]
20
+
21
+ query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
22
+ # lek is nil for the first page
23
+ url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"
24
+
25
+ pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
26
+ end
27
+
28
+ def self.get_all_wishlist_pages(url_without_qstring, query_params)
29
+ responses = Array.new
30
+ loop do
31
+ response = self.get_wishlist_page(url_without_qstring, query_params)
32
+ responses << response
33
+ return responses if response.body.include?("Find a gift") # as of the
34
+ #=> time this was written, this phrase appears only on the last page
35
+ lek = self.find_lek_from_response(response)
36
+ query_params[:lek] = lek # the rest of the query_params hash stays the same
37
+ end
38
+ end
39
+
40
+ def self.get_wishlist_page(url_without_qstring, query_params)
41
+ query_string = self.page_query_string(query_params)
42
+ RestClient.get(url_without_qstring + query_string)
43
+ end
44
+
45
+ def self.page_query_string(query_params)
46
+ "?reveal=#{query_params[:reveal]}&layout=standard&sort=#{query_params[:sort_string]})" +
47
+ (query_params[:lek] ? "&lek=#{query_params[:lek]}&type=wishlist&ajax=true" : '')
48
+ end
49
+
50
+ def self.find_lek_from_response(response)
51
+ # As of the time of writing this, "lastEvaluatedKey", abbreviated as "lek",
52
+ # is used to keep track of what portions of the wishlist have already been
53
+ # loaded, and is sent in the query string of ajax calls to get the next page
54
+ start_of_lek = response.body.split('name="lastEvaluatedKey" value="')[1]
55
+ start_of_lek.split('" class="lastEvaluatedKey"')[0]
56
+ end
57
+
58
+
59
+
60
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amazon_wish_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander V. Trujillo
@@ -11,7 +11,7 @@ cert_chain: []
11
11
  date: 2018-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: HTTParty
14
+ name: rest-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2'
33
+ version: '1.8'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2'
40
+ version: '1.8'
41
41
  description: Modeled after Amazon Wish Lister http://doitlikejustin.github.io/amazon-wish-lister/
42
42
  email:
43
43
  executables: []
@@ -45,6 +45,8 @@ extensions: []
45
45
  extra_rdoc_files: []
46
46
  files:
47
47
  - lib/amazon_wish_miner.rb
48
+ - lib/amazon_wish_miner/amazon_wish.rb
49
+ - lib/amazon_wish_miner/amazon_wish_list.rb
48
50
  homepage: https://github.com/avtrujillo/amazon_wish_miner
49
51
  licenses:
50
52
  - MIT