amazon-search 1.1.10 → 1.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Readme.rdoc +1 -1
- data/amazon-search.gemspec +25 -0
- data/amazon-search.rb +93 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c35d7c9c08987d534c8430d25e91206c9ba98571
|
4
|
+
data.tar.gz: 7d26df4b2233e9b6bd47a521eb50be6977eb62b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec092cf9867ff4bda6a324936cbf557a6e0cbee74ccd2d96780ad773c092f9e130d927e341bf049c8a6f0fe4f5e996f877f6a9652554db62bad39f8a5f95d32e
|
7
|
+
data.tar.gz: f0396b0589455cdc8615f485f557192957ccbe9e7bb0002a22b47e2c7fffd708cd390dfd01b87ffe7a88f54de0df10da816b0069271edee1f60d34cdf110ef93
|
data/Readme.rdoc
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = %q{amazon-search}
|
5
|
+
gem.version = '1.1.11'
|
6
|
+
gem.date = '2015-09-18'
|
7
|
+
gem.platform = Gem::Platform::RUBY
|
8
|
+
gem.required_ruby_version = '>= 1.8'
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split("\n")
|
11
|
+
gem.test_files = `git ls-files -- test/*`.split("\n")
|
12
|
+
|
13
|
+
gem.summary = "A simple screenscraper to search Amazon"
|
14
|
+
gem.description = "Simple screenscraper to search Amazon and return product titles, urls, image href, etc."
|
15
|
+
gem.authors = ["John Mason"]
|
16
|
+
gem.email = 'mace2345@gmail.com'
|
17
|
+
gem.homepage = 'https://github.com/m8ss/amazon-search'
|
18
|
+
gem.license = 'MIT'
|
19
|
+
|
20
|
+
gem.add_runtime_dependency('mechanize', '~> 2.7')
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
|
data/amazon-search.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'mechanize'
|
4
|
+
|
5
|
+
module Amazon
|
6
|
+
|
7
|
+
class Search
|
8
|
+
def self.find_products(keywords)
|
9
|
+
#--------- submit the search form with keywords ---------------------
|
10
|
+
agent = Mechanize.new
|
11
|
+
main_page = agent.get("http://amazon.com")
|
12
|
+
search_form = main_page.form_with :name => "site-search" # find the search form in Amazon
|
13
|
+
|
14
|
+
search_form.field_with(:name => "field-keywords").value = keywords # sets value of search box
|
15
|
+
search_results = agent.submit search_form # submits form
|
16
|
+
|
17
|
+
#--------- scan each page and store the results ---------------------
|
18
|
+
@product_divs = []
|
19
|
+
page_num = 0
|
20
|
+
next_page = agent.get(search_results.uri) # initial search results are the first page
|
21
|
+
|
22
|
+
last_page_num = search_results.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnDisabled", " " ))]'
|
23
|
+
last_page_num = last_page_num.text.to_i # change to int for upcoming iteration instructions
|
24
|
+
|
25
|
+
last_page_num.times do # cycle all pages and stop on last page
|
26
|
+
page_num += 1
|
27
|
+
page = agent.get(next_page.uri) # load the next page
|
28
|
+
|
29
|
+
@product_divs << page.search('//li[starts-with(@id, "result")]') # store the div of each product
|
30
|
+
|
31
|
+
next_page_link = page.link_with text: /Next Page/ # find the next page link
|
32
|
+
next_page = next_page_link.click unless page_num == last_page_num # click to next page unless on last page
|
33
|
+
end # ends pagination loop
|
34
|
+
|
35
|
+
puts "\n\n(end of search results)"
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def self.display_results
|
40
|
+
# nokogiri syntax is needed when iterating...not mechanize!
|
41
|
+
product_divs.each do |product|
|
42
|
+
|
43
|
+
#--------- nokogiri select html sections from css ---------------------
|
44
|
+
title = product.at_css(".s-access-title")
|
45
|
+
seller = product.at_css(".a-row > .a-spacing-none") #".a-spacing-small .a-spacing-none"
|
46
|
+
price = product.at_css(".s-price")
|
47
|
+
stars = product.at_css(".a-icon-star")
|
48
|
+
reviews = product.at_css("span+ .a-text-normal") # ".a-span-last .a-spacing-mini > span+ .a-text-normal"
|
49
|
+
image = product.at_css(".s-access-image")
|
50
|
+
url = product.at_css(".a-row > a")
|
51
|
+
|
52
|
+
#--------- avoid the related items gotchas ---------------------
|
53
|
+
if title == nil # if it's nil it's prob an ad
|
54
|
+
break
|
55
|
+
else
|
56
|
+
title = title.text
|
57
|
+
|
58
|
+
if seller == nil # if seller is nil put unknown
|
59
|
+
seller = "Unknown"
|
60
|
+
else
|
61
|
+
seller = seller.text
|
62
|
+
if price == nil # no price? prob not worthy item
|
63
|
+
break
|
64
|
+
|
65
|
+
else
|
66
|
+
price = price.text
|
67
|
+
if stars == nil
|
68
|
+
break
|
69
|
+
|
70
|
+
else
|
71
|
+
stars = stars.text
|
72
|
+
reviews = reviews.text
|
73
|
+
image = image['src']
|
74
|
+
url = url['href']
|
75
|
+
|
76
|
+
# errors properly avoided, now puts the results
|
77
|
+
STDOUT.puts "--"*50
|
78
|
+
STDOUT.puts "title: \t\t#{title}"
|
79
|
+
STDOUT.puts "seller: \t#{seller}"
|
80
|
+
STDOUT.puts "price: \t\t#{price}"
|
81
|
+
STDOUT.puts "stars: \t\t#{stars}"
|
82
|
+
STDOUT.puts "reviews: \t#{reviews}"
|
83
|
+
STDOUT.puts "image url: \t#{image}"
|
84
|
+
STDOUT.puts "product url: \t#{url}"
|
85
|
+
|
86
|
+
end # ends nil price
|
87
|
+
end # ends nil stars
|
88
|
+
end # ends nil seller
|
89
|
+
end # ends nil product
|
90
|
+
end # ends each product div iteration (page is finished)
|
91
|
+
end # ends display_results
|
92
|
+
end # ends Search Class
|
93
|
+
end # ends Amazon Module
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amazon-search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
@@ -32,6 +32,8 @@ extensions: []
|
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
34
|
- Readme.rdoc
|
35
|
+
- amazon-search.gemspec
|
36
|
+
- amazon-search.rb
|
35
37
|
homepage: https://github.com/m8ss/amazon-search
|
36
38
|
licenses:
|
37
39
|
- MIT
|