amazon-search 1.1.10 → 1.1.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Readme.rdoc +1 -1
- data/amazon-search.gemspec +25 -0
- data/amazon-search.rb +93 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c35d7c9c08987d534c8430d25e91206c9ba98571
|
4
|
+
data.tar.gz: 7d26df4b2233e9b6bd47a521eb50be6977eb62b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec092cf9867ff4bda6a324936cbf557a6e0cbee74ccd2d96780ad773c092f9e130d927e341bf049c8a6f0fe4f5e996f877f6a9652554db62bad39f8a5f95d32e
|
7
|
+
data.tar.gz: f0396b0589455cdc8615f485f557192957ccbe9e7bb0002a22b47e2c7fffd708cd390dfd01b87ffe7a88f54de0df10da816b0069271edee1f60d34cdf110ef93
|
data/Readme.rdoc
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = %q{amazon-search}
|
5
|
+
gem.version = '1.1.11'
|
6
|
+
gem.date = '2015-09-18'
|
7
|
+
gem.platform = Gem::Platform::RUBY
|
8
|
+
gem.required_ruby_version = '>= 1.8'
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split("\n")
|
11
|
+
gem.test_files = `git ls-files -- test/*`.split("\n")
|
12
|
+
|
13
|
+
gem.summary = "A simple screenscraper to search Amazon"
|
14
|
+
gem.description = "Simple screenscraper to search Amazon and return product titles, urls, image href, etc."
|
15
|
+
gem.authors = ["John Mason"]
|
16
|
+
gem.email = 'mace2345@gmail.com'
|
17
|
+
gem.homepage = 'https://github.com/m8ss/amazon-search'
|
18
|
+
gem.license = 'MIT'
|
19
|
+
|
20
|
+
gem.add_runtime_dependency('mechanize', '~> 2.7')
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
|
data/amazon-search.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'mechanize'
|
4
|
+
|
5
|
+
module Amazon
|
6
|
+
|
7
|
+
class Search
|
8
|
+
def self.find_products(keywords)
|
9
|
+
#--------- submit the search form with keywords ---------------------
|
10
|
+
agent = Mechanize.new
|
11
|
+
main_page = agent.get("http://amazon.com")
|
12
|
+
search_form = main_page.form_with :name => "site-search" # find the search form in Amazon
|
13
|
+
|
14
|
+
search_form.field_with(:name => "field-keywords").value = keywords # sets value of search box
|
15
|
+
search_results = agent.submit search_form # submits form
|
16
|
+
|
17
|
+
#--------- scan each page and store the results ---------------------
|
18
|
+
@product_divs = []
|
19
|
+
page_num = 0
|
20
|
+
next_page = agent.get(search_results.uri) # initial search results are the first page
|
21
|
+
|
22
|
+
last_page_num = search_results.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnDisabled", " " ))]'
|
23
|
+
last_page_num = last_page_num.text.to_i # change to int for upcoming iteration instructions
|
24
|
+
|
25
|
+
last_page_num.times do # cycle all pages and stop on last page
|
26
|
+
page_num += 1
|
27
|
+
page = agent.get(next_page.uri) # load the next page
|
28
|
+
|
29
|
+
@product_divs << page.search('//li[starts-with(@id, "result")]') # store the div of each product
|
30
|
+
|
31
|
+
next_page_link = page.link_with text: /Next Page/ # find the next page link
|
32
|
+
next_page = next_page_link.click unless page_num == last_page_num # click to next page unless on last page
|
33
|
+
end # ends pagination loop
|
34
|
+
|
35
|
+
puts "\n\n(end of search results)"
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def self.display_results
|
40
|
+
# nokogiri syntax is needed when iterating...not mechanize!
|
41
|
+
product_divs.each do |product|
|
42
|
+
|
43
|
+
#--------- nokogiri select html sections from css ---------------------
|
44
|
+
title = product.at_css(".s-access-title")
|
45
|
+
seller = product.at_css(".a-row > .a-spacing-none") #".a-spacing-small .a-spacing-none"
|
46
|
+
price = product.at_css(".s-price")
|
47
|
+
stars = product.at_css(".a-icon-star")
|
48
|
+
reviews = product.at_css("span+ .a-text-normal") # ".a-span-last .a-spacing-mini > span+ .a-text-normal"
|
49
|
+
image = product.at_css(".s-access-image")
|
50
|
+
url = product.at_css(".a-row > a")
|
51
|
+
|
52
|
+
#--------- avoid the related items gotchas ---------------------
|
53
|
+
if title == nil # if it's nil it's prob an ad
|
54
|
+
break
|
55
|
+
else
|
56
|
+
title = title.text
|
57
|
+
|
58
|
+
if seller == nil # if seller is nil put unknown
|
59
|
+
seller = "Unknown"
|
60
|
+
else
|
61
|
+
seller = seller.text
|
62
|
+
if price == nil # no price? prob not worthy item
|
63
|
+
break
|
64
|
+
|
65
|
+
else
|
66
|
+
price = price.text
|
67
|
+
if stars == nil
|
68
|
+
break
|
69
|
+
|
70
|
+
else
|
71
|
+
stars = stars.text
|
72
|
+
reviews = reviews.text
|
73
|
+
image = image['src']
|
74
|
+
url = url['href']
|
75
|
+
|
76
|
+
# errors properly avoided, now puts the results
|
77
|
+
STDOUT.puts "--"*50
|
78
|
+
STDOUT.puts "title: \t\t#{title}"
|
79
|
+
STDOUT.puts "seller: \t#{seller}"
|
80
|
+
STDOUT.puts "price: \t\t#{price}"
|
81
|
+
STDOUT.puts "stars: \t\t#{stars}"
|
82
|
+
STDOUT.puts "reviews: \t#{reviews}"
|
83
|
+
STDOUT.puts "image url: \t#{image}"
|
84
|
+
STDOUT.puts "product url: \t#{url}"
|
85
|
+
|
86
|
+
end # ends nil price
|
87
|
+
end # ends nil stars
|
88
|
+
end # ends nil seller
|
89
|
+
end # ends nil product
|
90
|
+
end # ends each product div iteration (page is finished)
|
91
|
+
end # ends display_results
|
92
|
+
end # ends Search Class
|
93
|
+
end # ends Amazon Module
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amazon-search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
@@ -32,6 +32,8 @@ extensions: []
|
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
34
|
- Readme.rdoc
|
35
|
+
- amazon-search.gemspec
|
36
|
+
- amazon-search.rb
|
35
37
|
homepage: https://github.com/m8ss/amazon-search
|
36
38
|
licenses:
|
37
39
|
- MIT
|