amazon-search 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/amazon-search.rb +108 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ffd9ecb7e82f05973d09530f4f8c6d4a305f3870
4
+ data.tar.gz: 9c3f15bce32b1ba1c6ba76affb5d9bb39e7ea62f
5
+ SHA512:
6
+ metadata.gz: 20c6d71d9d20ceb3bc72c3256409f32e395198b87edb7cc9ab0f522765035c91a76bee96fc2714187e969ce11e9b1df4a3845ab292a25e320a92084d1681708c
7
+ data.tar.gz: ca2aac4a30a8b66de94263c9abca6c46618816342384e50e02db5fc98a17c5726847cce8689b3cf688e39627f1d41992943881af4b0bac9636cf6149eef404d9
@@ -0,0 +1,108 @@
1
+
2
+ require 'mechanize'
3
+
4
+ ###########################################
5
+ # Select Amazon search form and submit
6
+ # the search criteria
7
+ ###########################################
8
+ agent = Mechanize.new
9
+ main_page = agent.get("http://amazon.com")
10
+ search_form = main_page.form_with :name => "site-search"
11
+
12
+ while true # until user cancels
13
+ puts "\nPlease enter keywords for Amazon search"
14
+ keywords = gets.chomp # asks for search terms
15
+ search_form.field_with(:name => "field-keywords").value = keywords # sets value of search box
16
+ search_results = agent.submit search_form # submits form
17
+
18
+
19
+ ###########################################
20
+ # Cycle through all result pages
21
+ # and list product links
22
+ ###########################################
23
+
24
+
25
+ next_page = agent.get(search_results.uri) # initial search results are the first page
26
+
27
+ # last page is disabled nav button in search results
28
+ last_page_num = search_results.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnDisabled", " " ))]'
29
+ last_page_num = last_page_num.text.to_i # change to int for upcoming iteration instructions
30
+
31
+ count = 0 # start count variable
32
+
33
+ last_page_num.times do # loop forever until stopped
34
+ count += 1
35
+
36
+ page = agent.get(next_page.uri) # load the next page
37
+
38
+ #--------- display page number ---------------------
39
+ # current_page = page.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnCur", " " ))]'
40
+ # puts "\n", "=="*50
41
+ # puts "Displaying '#{current_page.text}' of '20' pages"
42
+ # puts "This is the current page's uri:"
43
+ # puts page.uri
44
+
45
+
46
+ product_divs = page.search('//li[starts-with(@id, "result")]') # find the div of each product
47
+ # '//li[starts-with(@id, "result")]' <-- this works but includes ads...
48
+
49
+ # nokogiri syntax is needed when iterating...not mechanize!
50
+ product_divs.each do |product|
51
+
52
+ #--------- nokogiri select html sections from css ---------------------
53
+ title = product.at_css(".s-access-title")
54
+ seller = product.at_css(".a-row > .a-spacing-none") #".a-spacing-small .a-spacing-none"
55
+ price = product.at_css(".s-price")
56
+ stars = product.at_css(".a-icon-star")
57
+ reviews = product.at_css("span+ .a-text-normal") # ".a-span-last .a-spacing-mini > span+ .a-text-normal"
58
+ image = product.at_css(".s-access-image")
59
+ url = product.at_css(".a-row > a")
60
+
61
+
62
+
63
+ if title == nil # if it's nil it's prob an ad
64
+ break
65
+ else
66
+ title = title.text
67
+
68
+ if seller == nil # if seller is nil it's prob a movie
69
+ seller = "unknown"
70
+ else
71
+ seller = seller.text
72
+ if price == nil # no price? prob not worthy item
73
+ break
74
+
75
+ else
76
+ price = price.text
77
+ if stars == nil
78
+ break
79
+
80
+ else
81
+ stars = stars.text
82
+ reviews = reviews.text
83
+ image = image['src']
84
+ url = url['href']
85
+
86
+ puts "--"*50
87
+ puts "title: \t\t#{title}"
88
+ puts "seller: \t#{seller}"
89
+ puts "price: \t\t#{price}"
90
+ puts "stars: \t\t#{stars}"
91
+ puts "reviews: \t#{reviews}"
92
+ puts "image url: \t#{image}"
93
+ puts "product url: \t#{url}"
94
+
95
+ end # ends nil price if statement
96
+ end # ends nil stars if statement
97
+ end # ends nil seller if statement
98
+ end # ends nil product if statement
99
+ end # ends each product div iteration (page is finished)
100
+
101
+ next_page_link = page.link_with text: /Next Page/ # find the next page link
102
+
103
+ next_page = next_page_link.click unless count == 20 # click to next page unless on page 20
104
+ end
105
+
106
+ puts "\n\n(end of search results)"
107
+
108
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: amazon-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - John Mason
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A simple screenscraper to search Amazon
14
+ email: mace2345@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/amazon-search.rb
20
+ homepage: http://rubygems.org/gems/amazon-search
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.6
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Hola!
44
+ test_files: []