amazon-search 0.0.0 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/amazon-search.rb +41 -34
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7fd48ebf536715f9bf47f41b21d44717288858d3
|
4
|
+
data.tar.gz: 16c46d799f36417c1ca8d9a7b23d5264718fbdd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45be5d79564aa76d69955eb1ab60e5ff045e486fcdadd8cd1e738ba79742a91f5fedcab6bc2b8a940a3e0405f8712290b7bb0be2e6435589ad898d738436ce40
|
7
|
+
data.tar.gz: 80b9408721de51f964fdbfb082d4c29d9adccb5ea4785fc6e5b640a6d7a2f4eb97d8df8f378f4e7657fd59cf806442dca8f888b81aa86d8f244084b187b3ef6c
|
data/lib/amazon-search.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
1
2
|
|
2
3
|
require 'mechanize'
|
3
4
|
|
@@ -5,46 +6,43 @@ require 'mechanize'
|
|
5
6
|
# Select Amazon search form and submit
|
6
7
|
# the search criteria
|
7
8
|
###########################################
|
9
|
+
|
8
10
|
agent = Mechanize.new
|
9
11
|
main_page = agent.get("http://amazon.com")
|
10
12
|
search_form = main_page.form_with :name => "site-search"
|
11
13
|
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
keywords = ARGV[0] # asks for search terms
|
15
|
+
|
16
|
+
if ARGV[0] # did user enter in search keywords?
|
15
17
|
search_form.field_with(:name => "field-keywords").value = keywords # sets value of search box
|
16
18
|
search_results = agent.submit search_form # submits form
|
17
|
-
|
18
|
-
|
19
|
+
|
19
20
|
###########################################
|
20
21
|
# Cycle through all result pages
|
21
22
|
# and list product links
|
22
23
|
###########################################
|
23
|
-
|
24
|
-
|
24
|
+
|
25
25
|
next_page = agent.get(search_results.uri) # initial search results are the first page
|
26
26
|
|
27
27
|
# last page is disabled nav button in search results
|
28
28
|
last_page_num = search_results.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnDisabled", " " ))]'
|
29
29
|
last_page_num = last_page_num.text.to_i # change to int for upcoming iteration instructions
|
30
|
-
|
30
|
+
|
31
31
|
count = 0 # start count variable
|
32
|
-
|
32
|
+
|
33
33
|
last_page_num.times do # loop forever until stopped
|
34
34
|
count += 1
|
35
|
-
|
35
|
+
|
36
36
|
page = agent.get(next_page.uri) # load the next page
|
37
37
|
|
38
38
|
#--------- display page number ---------------------
|
39
39
|
# current_page = page.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnCur", " " ))]'
|
40
|
-
# puts "\n", "=="*50
|
41
|
-
# puts "Displaying '#{current_page.text}' of '20' pages"
|
42
|
-
# puts "This is the current page's uri:"
|
43
|
-
# puts page.uri
|
44
|
-
|
40
|
+
# STDOUT.puts "\n", "=="*50
|
41
|
+
# STDOUT.puts "Displaying '#{current_page.text}' of '20' pages"
|
42
|
+
# STDOUT.puts "This is the current page's uri:"
|
43
|
+
# STDOUT.puts page.uri
|
45
44
|
|
46
45
|
product_divs = page.search('//li[starts-with(@id, "result")]') # find the div of each product
|
47
|
-
# '//li[starts-with(@id, "result")]' <-- this works but includes ads...
|
48
46
|
|
49
47
|
# nokogiri syntax is needed when iterating...not mechanize!
|
50
48
|
product_divs.each do |product|
|
@@ -57,16 +55,14 @@ while true # until user cancels
|
|
57
55
|
reviews = product.at_css("span+ .a-text-normal") # ".a-span-last .a-spacing-mini > span+ .a-text-normal"
|
58
56
|
image = product.at_css(".s-access-image")
|
59
57
|
url = product.at_css(".a-row > a")
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
|
63
59
|
if title == nil # if it's nil it's prob an ad
|
64
60
|
break
|
65
61
|
else
|
66
62
|
title = title.text
|
67
63
|
|
68
|
-
if seller == nil # if seller is nil
|
69
|
-
seller = "
|
64
|
+
if seller == nil # if seller is nil put unknown
|
65
|
+
seller = "Unknown"
|
70
66
|
else
|
71
67
|
seller = seller.text
|
72
68
|
if price == nil # no price? prob not worthy item
|
@@ -83,14 +79,14 @@ while true # until user cancels
|
|
83
79
|
image = image['src']
|
84
80
|
url = url['href']
|
85
81
|
|
86
|
-
puts "--"*50
|
87
|
-
puts "title: \t\t#{title}"
|
88
|
-
puts "seller: \t#{seller}"
|
89
|
-
puts "price: \t\t#{price}"
|
90
|
-
puts "stars: \t\t#{stars}"
|
91
|
-
puts "reviews: \t#{reviews}"
|
92
|
-
puts "image url: \t#{image}"
|
93
|
-
puts "product url: \t#{url}"
|
82
|
+
STDOUT.puts "--"*50
|
83
|
+
STDOUT.puts "title: \t\t#{title}"
|
84
|
+
STDOUT.puts "seller: \t#{seller}"
|
85
|
+
STDOUT.puts "price: \t\t#{price}"
|
86
|
+
STDOUT.puts "stars: \t\t#{stars}"
|
87
|
+
STDOUT.puts "reviews: \t#{reviews}"
|
88
|
+
STDOUT.puts "image url: \t#{image}"
|
89
|
+
STDOUT.puts "product url: \t#{url}"
|
94
90
|
|
95
91
|
end # ends nil price if statement
|
96
92
|
end # ends nil stars if statement
|
@@ -98,11 +94,22 @@ while true # until user cancels
|
|
98
94
|
end # ends nil product if statement
|
99
95
|
end # ends each product div iteration (page is finished)
|
100
96
|
|
101
|
-
next_page_link = page.link_with text: /Next Page/ # find the next page link
|
102
|
-
|
97
|
+
next_page_link = page.link_with text: /Next Page/ # find the next page link
|
103
98
|
next_page = next_page_link.click unless count == 20 # click to next page unless on page 20
|
104
|
-
end
|
105
|
-
|
99
|
+
end # ends pagination loop
|
100
|
+
|
106
101
|
puts "\n\n(end of search results)"
|
107
102
|
|
108
|
-
|
103
|
+
else # user didn't enter search keywords
|
104
|
+
|
105
|
+
STDOUT.puts <<-EOF
|
106
|
+
Please provide search keywords
|
107
|
+
|
108
|
+
Example Usage:
|
109
|
+
amazon-search watches
|
110
|
+
amazon-search books
|
111
|
+
amazon-search games
|
112
|
+
|
113
|
+
EOF
|
114
|
+
|
115
|
+
end # ends ARGV if statement
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amazon-search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
@@ -9,15 +9,30 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2015-09-17 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
13
|
-
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.7.3
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.7.3
|
27
|
+
description: Simple screenscraper to search Amazon and return product titles, urls,
|
28
|
+
image href, etc.
|
14
29
|
email: mace2345@gmail.com
|
15
30
|
executables: []
|
16
31
|
extensions: []
|
17
32
|
extra_rdoc_files: []
|
18
33
|
files:
|
19
34
|
- lib/amazon-search.rb
|
20
|
-
homepage:
|
35
|
+
homepage: https://github.com/m8ss/amazon-search
|
21
36
|
licenses:
|
22
37
|
- MIT
|
23
38
|
metadata: {}
|
@@ -40,5 +55,5 @@ rubyforge_project:
|
|
40
55
|
rubygems_version: 2.4.6
|
41
56
|
signing_key:
|
42
57
|
specification_version: 4
|
43
|
-
summary:
|
58
|
+
summary: A simple screenscraper to search Amazon
|
44
59
|
test_files: []
|