amazon-search 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/amazon-search.rb +108 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ffd9ecb7e82f05973d09530f4f8c6d4a305f3870
|
4
|
+
data.tar.gz: 9c3f15bce32b1ba1c6ba76affb5d9bb39e7ea62f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 20c6d71d9d20ceb3bc72c3256409f32e395198b87edb7cc9ab0f522765035c91a76bee96fc2714187e969ce11e9b1df4a3845ab292a25e320a92084d1681708c
|
7
|
+
data.tar.gz: ca2aac4a30a8b66de94263c9abca6c46618816342384e50e02db5fc98a17c5726847cce8689b3cf688e39627f1d41992943881af4b0bac9636cf6149eef404d9
|
@@ -0,0 +1,108 @@
|
|
1
|
+
|
2
|
+
require 'mechanize'
|
3
|
+
|
4
|
+
###########################################
|
5
|
+
# Select Amazon search form and submit
|
6
|
+
# the search criteria
|
7
|
+
###########################################
|
8
|
+
agent = Mechanize.new
|
9
|
+
main_page = agent.get("http://amazon.com")
|
10
|
+
search_form = main_page.form_with :name => "site-search"
|
11
|
+
|
12
|
+
while true # until user cancels
|
13
|
+
puts "\nPlease enter keywords for Amazon search"
|
14
|
+
keywords = gets.chomp # asks for search terms
|
15
|
+
search_form.field_with(:name => "field-keywords").value = keywords # sets value of search box
|
16
|
+
search_results = agent.submit search_form # submits form
|
17
|
+
|
18
|
+
|
19
|
+
###########################################
|
20
|
+
# Cycle through all result pages
|
21
|
+
# and list product links
|
22
|
+
###########################################
|
23
|
+
|
24
|
+
|
25
|
+
next_page = agent.get(search_results.uri) # initial search results are the first page
|
26
|
+
|
27
|
+
# last page is disabled nav button in search results
|
28
|
+
last_page_num = search_results.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnDisabled", " " ))]'
|
29
|
+
last_page_num = last_page_num.text.to_i # change to int for upcoming iteration instructions
|
30
|
+
|
31
|
+
count = 0 # start count variable
|
32
|
+
|
33
|
+
last_page_num.times do # loop forever until stopped
|
34
|
+
count += 1
|
35
|
+
|
36
|
+
page = agent.get(next_page.uri) # load the next page
|
37
|
+
|
38
|
+
#--------- display page number ---------------------
|
39
|
+
# current_page = page.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnCur", " " ))]'
|
40
|
+
# puts "\n", "=="*50
|
41
|
+
# puts "Displaying '#{current_page.text}' of '20' pages"
|
42
|
+
# puts "This is the current page's uri:"
|
43
|
+
# puts page.uri
|
44
|
+
|
45
|
+
|
46
|
+
product_divs = page.search('//li[starts-with(@id, "result")]') # find the div of each product
|
47
|
+
# '//li[starts-with(@id, "result")]' <-- this works but includes ads...
|
48
|
+
|
49
|
+
# nokogiri syntax is needed when iterating...not mechanize!
|
50
|
+
product_divs.each do |product|
|
51
|
+
|
52
|
+
#--------- nokogiri select html sections from css ---------------------
|
53
|
+
title = product.at_css(".s-access-title")
|
54
|
+
seller = product.at_css(".a-row > .a-spacing-none") #".a-spacing-small .a-spacing-none"
|
55
|
+
price = product.at_css(".s-price")
|
56
|
+
stars = product.at_css(".a-icon-star")
|
57
|
+
reviews = product.at_css("span+ .a-text-normal") # ".a-span-last .a-spacing-mini > span+ .a-text-normal"
|
58
|
+
image = product.at_css(".s-access-image")
|
59
|
+
url = product.at_css(".a-row > a")
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
if title == nil # if it's nil it's prob an ad
|
64
|
+
break
|
65
|
+
else
|
66
|
+
title = title.text
|
67
|
+
|
68
|
+
if seller == nil # if seller is nil it's prob a movie
|
69
|
+
seller = "unknown"
|
70
|
+
else
|
71
|
+
seller = seller.text
|
72
|
+
if price == nil # no price? prob not worthy item
|
73
|
+
break
|
74
|
+
|
75
|
+
else
|
76
|
+
price = price.text
|
77
|
+
if stars == nil
|
78
|
+
break
|
79
|
+
|
80
|
+
else
|
81
|
+
stars = stars.text
|
82
|
+
reviews = reviews.text
|
83
|
+
image = image['src']
|
84
|
+
url = url['href']
|
85
|
+
|
86
|
+
puts "--"*50
|
87
|
+
puts "title: \t\t#{title}"
|
88
|
+
puts "seller: \t#{seller}"
|
89
|
+
puts "price: \t\t#{price}"
|
90
|
+
puts "stars: \t\t#{stars}"
|
91
|
+
puts "reviews: \t#{reviews}"
|
92
|
+
puts "image url: \t#{image}"
|
93
|
+
puts "product url: \t#{url}"
|
94
|
+
|
95
|
+
end # ends nil price if statement
|
96
|
+
end # ends nil stars if statement
|
97
|
+
end # ends nil seller if statement
|
98
|
+
end # ends nil product if statement
|
99
|
+
end # ends each product div iteration (page is finished)
|
100
|
+
|
101
|
+
next_page_link = page.link_with text: /Next Page/ # find the next page link
|
102
|
+
|
103
|
+
next_page = next_page_link.click unless count == 20 # click to next page unless on page 20
|
104
|
+
end
|
105
|
+
|
106
|
+
puts "\n\n(end of search results)"
|
107
|
+
|
108
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: amazon-search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Mason
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-09-17 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A simple screenscraper to search Amazon
|
14
|
+
email: mace2345@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/amazon-search.rb
|
20
|
+
homepage: http://rubygems.org/gems/amazon-search
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.4.6
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Hola!
|
44
|
+
test_files: []
|