amazon-search 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/amazon-search.rb +41 -34
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7fd48ebf536715f9bf47f41b21d44717288858d3
|
4
|
+
data.tar.gz: 16c46d799f36417c1ca8d9a7b23d5264718fbdd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45be5d79564aa76d69955eb1ab60e5ff045e486fcdadd8cd1e738ba79742a91f5fedcab6bc2b8a940a3e0405f8712290b7bb0be2e6435589ad898d738436ce40
|
7
|
+
data.tar.gz: 80b9408721de51f964fdbfb082d4c29d9adccb5ea4785fc6e5b640a6d7a2f4eb97d8df8f378f4e7657fd59cf806442dca8f888b81aa86d8f244084b187b3ef6c
|
data/lib/amazon-search.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
1
2
|
|
2
3
|
require 'mechanize'
|
3
4
|
|
@@ -5,46 +6,43 @@ require 'mechanize'
|
|
5
6
|
# Select Amazon search form and submit
|
6
7
|
# the search criteria
|
7
8
|
###########################################
|
9
|
+
|
8
10
|
agent = Mechanize.new
|
9
11
|
main_page = agent.get("http://amazon.com")
|
10
12
|
search_form = main_page.form_with :name => "site-search"
|
11
13
|
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
keywords = ARGV[0] # asks for search terms
|
15
|
+
|
16
|
+
if ARGV[0] # did user enter in search keywords?
|
15
17
|
search_form.field_with(:name => "field-keywords").value = keywords # sets value of search box
|
16
18
|
search_results = agent.submit search_form # submits form
|
17
|
-
|
18
|
-
|
19
|
+
|
19
20
|
###########################################
|
20
21
|
# Cycle through all result pages
|
21
22
|
# and list product links
|
22
23
|
###########################################
|
23
|
-
|
24
|
-
|
24
|
+
|
25
25
|
next_page = agent.get(search_results.uri) # initial search results are the first page
|
26
26
|
|
27
27
|
# last page is disabled nav button in search results
|
28
28
|
last_page_num = search_results.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnDisabled", " " ))]'
|
29
29
|
last_page_num = last_page_num.text.to_i # change to int for upcoming iteration instructions
|
30
|
-
|
30
|
+
|
31
31
|
count = 0 # start count variable
|
32
|
-
|
32
|
+
|
33
33
|
last_page_num.times do # loop forever until stopped
|
34
34
|
count += 1
|
35
|
-
|
35
|
+
|
36
36
|
page = agent.get(next_page.uri) # load the next page
|
37
37
|
|
38
38
|
#--------- display page number ---------------------
|
39
39
|
# current_page = page.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnCur", " " ))]'
|
40
|
-
# puts "\n", "=="*50
|
41
|
-
# puts "Displaying '#{current_page.text}' of '20' pages"
|
42
|
-
# puts "This is the current page's uri:"
|
43
|
-
# puts page.uri
|
44
|
-
|
40
|
+
# STDOUT.puts "\n", "=="*50
|
41
|
+
# STDOUT.puts "Displaying '#{current_page.text}' of '20' pages"
|
42
|
+
# STDOUT.puts "This is the current page's uri:"
|
43
|
+
# STDOUT.puts page.uri
|
45
44
|
|
46
45
|
product_divs = page.search('//li[starts-with(@id, "result")]') # find the div of each product
|
47
|
-
# '//li[starts-with(@id, "result")]' <-- this works but includes ads...
|
48
46
|
|
49
47
|
# nokogiri syntax is needed when iterating...not mechanize!
|
50
48
|
product_divs.each do |product|
|
@@ -57,16 +55,14 @@ while true # until user cancels
|
|
57
55
|
reviews = product.at_css("span+ .a-text-normal") # ".a-span-last .a-spacing-mini > span+ .a-text-normal"
|
58
56
|
image = product.at_css(".s-access-image")
|
59
57
|
url = product.at_css(".a-row > a")
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
|
63
59
|
if title == nil # if it's nil it's prob an ad
|
64
60
|
break
|
65
61
|
else
|
66
62
|
title = title.text
|
67
63
|
|
68
|
-
if seller == nil # if seller is nil
|
69
|
-
seller = "
|
64
|
+
if seller == nil # if seller is nil put unknown
|
65
|
+
seller = "Unknown"
|
70
66
|
else
|
71
67
|
seller = seller.text
|
72
68
|
if price == nil # no price? prob not worthy item
|
@@ -83,14 +79,14 @@ while true # until user cancels
|
|
83
79
|
image = image['src']
|
84
80
|
url = url['href']
|
85
81
|
|
86
|
-
puts "--"*50
|
87
|
-
puts "title: \t\t#{title}"
|
88
|
-
puts "seller: \t#{seller}"
|
89
|
-
puts "price: \t\t#{price}"
|
90
|
-
puts "stars: \t\t#{stars}"
|
91
|
-
puts "reviews: \t#{reviews}"
|
92
|
-
puts "image url: \t#{image}"
|
93
|
-
puts "product url: \t#{url}"
|
82
|
+
STDOUT.puts "--"*50
|
83
|
+
STDOUT.puts "title: \t\t#{title}"
|
84
|
+
STDOUT.puts "seller: \t#{seller}"
|
85
|
+
STDOUT.puts "price: \t\t#{price}"
|
86
|
+
STDOUT.puts "stars: \t\t#{stars}"
|
87
|
+
STDOUT.puts "reviews: \t#{reviews}"
|
88
|
+
STDOUT.puts "image url: \t#{image}"
|
89
|
+
STDOUT.puts "product url: \t#{url}"
|
94
90
|
|
95
91
|
end # ends nil price if statement
|
96
92
|
end # ends nil stars if statement
|
@@ -98,11 +94,22 @@ while true # until user cancels
|
|
98
94
|
end # ends nil product if statement
|
99
95
|
end # ends each product div iteration (page is finished)
|
100
96
|
|
101
|
-
next_page_link = page.link_with text: /Next Page/ # find the next page link
|
102
|
-
|
97
|
+
next_page_link = page.link_with text: /Next Page/ # find the next page link
|
103
98
|
next_page = next_page_link.click unless count == 20 # click to next page unless on page 20
|
104
|
-
end
|
105
|
-
|
99
|
+
end # ends pagination loop
|
100
|
+
|
106
101
|
puts "\n\n(end of search results)"
|
107
102
|
|
108
|
-
|
103
|
+
else # user didn't enter search keywords
|
104
|
+
|
105
|
+
STDOUT.puts <<-EOF
|
106
|
+
Please provide search keywords
|
107
|
+
|
108
|
+
Example Usage:
|
109
|
+
amazon-search watches
|
110
|
+
amazon-search books
|
111
|
+
amazon-search games
|
112
|
+
|
113
|
+
EOF
|
114
|
+
|
115
|
+
end # ends ARGV if statement
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amazon-search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
@@ -9,15 +9,30 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2015-09-17 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
13
|
-
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.7.3
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.7.3
|
27
|
+
description: Simple screenscraper to search Amazon and return product titles, urls,
|
28
|
+
image href, etc.
|
14
29
|
email: mace2345@gmail.com
|
15
30
|
executables: []
|
16
31
|
extensions: []
|
17
32
|
extra_rdoc_files: []
|
18
33
|
files:
|
19
34
|
- lib/amazon-search.rb
|
20
|
-
homepage:
|
35
|
+
homepage: https://github.com/m8ss/amazon-search
|
21
36
|
licenses:
|
22
37
|
- MIT
|
23
38
|
metadata: {}
|
@@ -40,5 +55,5 @@ rubyforge_project:
|
|
40
55
|
rubygems_version: 2.4.6
|
41
56
|
signing_key:
|
42
57
|
specification_version: 4
|
43
|
-
summary:
|
58
|
+
summary: A simple screenscraper to search Amazon
|
44
59
|
test_files: []
|