amazon-search 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/amazon-search.rb +108 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ffd9ecb7e82f05973d09530f4f8c6d4a305f3870
|
4
|
+
data.tar.gz: 9c3f15bce32b1ba1c6ba76affb5d9bb39e7ea62f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 20c6d71d9d20ceb3bc72c3256409f32e395198b87edb7cc9ab0f522765035c91a76bee96fc2714187e969ce11e9b1df4a3845ab292a25e320a92084d1681708c
|
7
|
+
data.tar.gz: ca2aac4a30a8b66de94263c9abca6c46618816342384e50e02db5fc98a17c5726847cce8689b3cf688e39627f1d41992943881af4b0bac9636cf6149eef404d9
|
@@ -0,0 +1,108 @@
|
|
1
|
+
|
2
|
+
require 'mechanize'
|
3
|
+
|
4
|
+
###########################################
|
5
|
+
# Select Amazon search form and submit
|
6
|
+
# the search criteria
|
7
|
+
###########################################
|
8
|
+
agent = Mechanize.new
|
9
|
+
main_page = agent.get("http://amazon.com")
|
10
|
+
search_form = main_page.form_with :name => "site-search"
|
11
|
+
|
12
|
+
while true # until user cancels
|
13
|
+
puts "\nPlease enter keywords for Amazon search"
|
14
|
+
keywords = gets.chomp # asks for search terms
|
15
|
+
search_form.field_with(:name => "field-keywords").value = keywords # sets value of search box
|
16
|
+
search_results = agent.submit search_form # submits form
|
17
|
+
|
18
|
+
|
19
|
+
###########################################
|
20
|
+
# Cycle through all result pages
|
21
|
+
# and list product links
|
22
|
+
###########################################
|
23
|
+
|
24
|
+
|
25
|
+
next_page = agent.get(search_results.uri) # initial search results are the first page
|
26
|
+
|
27
|
+
# last page is disabled nav button in search results
|
28
|
+
last_page_num = search_results.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnDisabled", " " ))]'
|
29
|
+
last_page_num = last_page_num.text.to_i # change to int for upcoming iteration instructions
|
30
|
+
|
31
|
+
count = 0 # start count variable
|
32
|
+
|
33
|
+
last_page_num.times do # loop forever until stopped
|
34
|
+
count += 1
|
35
|
+
|
36
|
+
page = agent.get(next_page.uri) # load the next page
|
37
|
+
|
38
|
+
#--------- display page number ---------------------
|
39
|
+
# current_page = page.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnCur", " " ))]'
|
40
|
+
# puts "\n", "=="*50
|
41
|
+
# puts "Displaying '#{current_page.text}' of '20' pages"
|
42
|
+
# puts "This is the current page's uri:"
|
43
|
+
# puts page.uri
|
44
|
+
|
45
|
+
|
46
|
+
product_divs = page.search('//li[starts-with(@id, "result")]') # find the div of each product
|
47
|
+
# '//li[starts-with(@id, "result")]' <-- this works but includes ads...
|
48
|
+
|
49
|
+
# nokogiri syntax is needed when iterating...not mechanize!
|
50
|
+
product_divs.each do |product|
|
51
|
+
|
52
|
+
#--------- nokogiri select html sections from css ---------------------
|
53
|
+
title = product.at_css(".s-access-title")
|
54
|
+
seller = product.at_css(".a-row > .a-spacing-none") #".a-spacing-small .a-spacing-none"
|
55
|
+
price = product.at_css(".s-price")
|
56
|
+
stars = product.at_css(".a-icon-star")
|
57
|
+
reviews = product.at_css("span+ .a-text-normal") # ".a-span-last .a-spacing-mini > span+ .a-text-normal"
|
58
|
+
image = product.at_css(".s-access-image")
|
59
|
+
url = product.at_css(".a-row > a")
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
if title == nil # if it's nil it's prob an ad
|
64
|
+
break
|
65
|
+
else
|
66
|
+
title = title.text
|
67
|
+
|
68
|
+
if seller == nil # if seller is nil it's prob a movie
|
69
|
+
seller = "unknown"
|
70
|
+
else
|
71
|
+
seller = seller.text
|
72
|
+
if price == nil # no price? prob not worthy item
|
73
|
+
break
|
74
|
+
|
75
|
+
else
|
76
|
+
price = price.text
|
77
|
+
if stars == nil
|
78
|
+
break
|
79
|
+
|
80
|
+
else
|
81
|
+
stars = stars.text
|
82
|
+
reviews = reviews.text
|
83
|
+
image = image['src']
|
84
|
+
url = url['href']
|
85
|
+
|
86
|
+
puts "--"*50
|
87
|
+
puts "title: \t\t#{title}"
|
88
|
+
puts "seller: \t#{seller}"
|
89
|
+
puts "price: \t\t#{price}"
|
90
|
+
puts "stars: \t\t#{stars}"
|
91
|
+
puts "reviews: \t#{reviews}"
|
92
|
+
puts "image url: \t#{image}"
|
93
|
+
puts "product url: \t#{url}"
|
94
|
+
|
95
|
+
end # ends nil price if statement
|
96
|
+
end # ends nil stars if statement
|
97
|
+
end # ends nil seller if statement
|
98
|
+
end # ends nil product if statement
|
99
|
+
end # ends each product div iteration (page is finished)
|
100
|
+
|
101
|
+
next_page_link = page.link_with text: /Next Page/ # find the next page link
|
102
|
+
|
103
|
+
next_page = next_page_link.click unless count == 20 # click to next page unless on page 20
|
104
|
+
end
|
105
|
+
|
106
|
+
puts "\n\n(end of search results)"
|
107
|
+
|
108
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: amazon-search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Mason
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-09-17 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A simple screenscraper to search Amazon
|
14
|
+
email: mace2345@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/amazon-search.rb
|
20
|
+
homepage: http://rubygems.org/gems/amazon-search
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.4.6
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Hola!
|
44
|
+
test_files: []
|