amazon_wish_miner 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/amazon_wish_miner/amazon_wish.rb +15 -15
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69edf4f7a461874e9fa55c132c82f73294f881dc962b0ee346e1bd42226d0cbb
|
4
|
+
data.tar.gz: 9fc28366f6057787eb6f9873ffb579348ce1381562e4df89b63fe063c67fdcd7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13f62b3068d17cd1a1bcecb466add6850d987140a60ad37691e7c3e43cc79f4d7f7f0d3e79b633e3959fc339863e99a20c8ef78cb47b9c2a805fb54beb1dcd2c
|
7
|
+
data.tar.gz: d5171d19b06dcb0a7668728b739e6ab9490ae081d63764f662a7f1b02449c2bef4a2b35e7f508fa2162bc45d7e6f38ded69d279ee3510c411dc7bed4ff477133
|
@@ -1,20 +1,20 @@
|
|
1
1
|
class AmazonWish
|
2
2
|
|
3
|
-
attr_reader :title, :
|
3
|
+
attr_reader :title, :asin
|
4
4
|
|
5
|
-
def initialize(
|
5
|
+
def initialize(asin, title)
|
6
6
|
@title = title
|
7
|
-
@
|
7
|
+
@asin = asin
|
8
8
|
end
|
9
9
|
|
10
10
|
def url
|
11
|
-
"https://www.amazon.com/dp/#{@
|
11
|
+
"https://www.amazon.com/dp/#{@asin}"
|
12
12
|
end
|
13
13
|
|
14
14
|
def self.parse_wishes_from_pages(page_responses)
|
15
15
|
list_items = self.list_items_from_response(page_responses)
|
16
|
-
|
17
|
-
|
16
|
+
wish_asins = self.draps_from_list_items(list_items)
|
17
|
+
wishes_from_asins(wish_asins)
|
18
18
|
end
|
19
19
|
|
20
20
|
def self.list_items_from_response(page_responses)
|
@@ -27,13 +27,13 @@ class AmazonWish
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.draps_from_list_items(list_items)
|
30
|
-
list_items.each_with_object(Array.new) do |li,
|
30
|
+
list_items.each_with_object(Array.new) do |li, wish_asins|
|
31
31
|
drap = li['data-reposition-action-params']
|
32
|
-
|
32
|
+
wish_asins << external_id_from_drap(drap)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
def self.external_id_from_drap(drap)
|
36
|
+
def self.external_id_from_drap(drap) # the page refers to the ASIN as "itemExternalID"
|
37
37
|
attrs = drap.split(',')
|
38
38
|
attr_substrings = attrs.map { |elem| elem.split(':') }
|
39
39
|
ied_attr = attr_substrings.find { |ss| ss.include?("{\"itemExternalId\"")}
|
@@ -45,14 +45,14 @@ class AmazonWish
|
|
45
45
|
# parsing item info from the item's own url rather than from the wishlist
|
46
46
|
#=> means that we can reuse the method below to scrape item info
|
47
47
|
|
48
|
-
def self.
|
49
|
-
|
50
|
-
self.
|
48
|
+
def self.wishes_from_asins(asins)
|
49
|
+
asins.map do |asin_elem|
|
50
|
+
self.item_from_asin(asin_elem)
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
-
def self.
|
55
|
-
item_url = 'https://www.amazon.com/dp/' +
|
54
|
+
def self.item_from_asin(asin_arg)
|
55
|
+
item_url = 'https://www.amazon.com/dp/' + asin_arg
|
56
56
|
response = RestClient.get(item_url)
|
57
57
|
page = Nokogiri::HTML(response)
|
58
58
|
title = get_title_from_page(page)
|
@@ -62,7 +62,7 @@ class AmazonWish
|
|
62
62
|
# TODO: parse prices
|
63
63
|
# description = parse_feature_bullets(page.css('div#feature-bullets'))
|
64
64
|
# TODO: get description parsing to work for different types of items
|
65
|
-
AmazonWish.new(
|
65
|
+
AmazonWish.new(asin_arg, title)
|
66
66
|
end
|
67
67
|
|
68
68
|
def self.parse_feature_bullets(feature_bullets_div)
|