amazon-search 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/amazon-search.rb +108 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ffd9ecb7e82f05973d09530f4f8c6d4a305f3870
4
+ data.tar.gz: 9c3f15bce32b1ba1c6ba76affb5d9bb39e7ea62f
5
+ SHA512:
6
+ metadata.gz: 20c6d71d9d20ceb3bc72c3256409f32e395198b87edb7cc9ab0f522765035c91a76bee96fc2714187e969ce11e9b1df4a3845ab292a25e320a92084d1681708c
7
+ data.tar.gz: ca2aac4a30a8b66de94263c9abca6c46618816342384e50e02db5fc98a17c5726847cce8689b3cf688e39627f1d41992943881af4b0bac9636cf6149eef404d9
@@ -0,0 +1,108 @@
1
+
2
+ require 'mechanize'
3
+
4
+ ###########################################
5
+ # Select Amazon search form and submit
6
+ # the search criteria
7
+ ###########################################
8
+ agent = Mechanize.new
9
+ main_page = agent.get("http://amazon.com")
10
+ search_form = main_page.form_with :name => "site-search"
11
+
12
+ while true # until user cancels
13
+ puts "\nPlease enter keywords for Amazon search"
14
+ keywords = gets.chomp # asks for search terms
15
+ search_form.field_with(:name => "field-keywords").value = keywords # sets value of search box
16
+ search_results = agent.submit search_form # submits form
17
+
18
+
19
+ ###########################################
20
+ # Cycle through all result pages
21
+ # and list product links
22
+ ###########################################
23
+
24
+
25
+ next_page = agent.get(search_results.uri) # initial search results are the first page
26
+
27
+ # last page is disabled nav button in search results
28
+ last_page_num = search_results.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnDisabled", " " ))]'
29
+ last_page_num = last_page_num.text.to_i # change to int for upcoming iteration instructions
30
+
31
+ count = 0 # start count variable
32
+
33
+ last_page_num.times do # loop forever until stopped
34
+ count += 1
35
+
36
+ page = agent.get(next_page.uri) # load the next page
37
+
38
+ #--------- display page number ---------------------
39
+ # current_page = page.search '//*[contains(concat( " ", @class, " " ), concat( " ", "pagnCur", " " ))]'
40
+ # puts "\n", "=="*50
41
+ # puts "Displaying '#{current_page.text}' of '20' pages"
42
+ # puts "This is the current page's uri:"
43
+ # puts page.uri
44
+
45
+
46
+ product_divs = page.search('//li[starts-with(@id, "result")]') # find the div of each product
47
+ # '//li[starts-with(@id, "result")]' <-- this works but includes ads...
48
+
49
+ # nokogiri syntax is needed when iterating...not mechanize!
50
+ product_divs.each do |product|
51
+
52
+ #--------- nokogiri select html sections from css ---------------------
53
+ title = product.at_css(".s-access-title")
54
+ seller = product.at_css(".a-row > .a-spacing-none") #".a-spacing-small .a-spacing-none"
55
+ price = product.at_css(".s-price")
56
+ stars = product.at_css(".a-icon-star")
57
+ reviews = product.at_css("span+ .a-text-normal") # ".a-span-last .a-spacing-mini > span+ .a-text-normal"
58
+ image = product.at_css(".s-access-image")
59
+ url = product.at_css(".a-row > a")
60
+
61
+
62
+
63
+ if title == nil # if it's nil it's prob an ad
64
+ break
65
+ else
66
+ title = title.text
67
+
68
+ if seller == nil # if seller is nil it's prob a movie
69
+ seller = "unknown"
70
+ else
71
+ seller = seller.text
72
+ if price == nil # no price? prob not worthy item
73
+ break
74
+
75
+ else
76
+ price = price.text
77
+ if stars == nil
78
+ break
79
+
80
+ else
81
+ stars = stars.text
82
+ reviews = reviews.text
83
+ image = image['src']
84
+ url = url['href']
85
+
86
+ puts "--"*50
87
+ puts "title: \t\t#{title}"
88
+ puts "seller: \t#{seller}"
89
+ puts "price: \t\t#{price}"
90
+ puts "stars: \t\t#{stars}"
91
+ puts "reviews: \t#{reviews}"
92
+ puts "image url: \t#{image}"
93
+ puts "product url: \t#{url}"
94
+
95
+ end # ends nil price if statement
96
+ end # ends nil stars if statement
97
+ end # ends nil seller if statement
98
+ end # ends nil product if statement
99
+ end # ends each product div iteration (page is finished)
100
+
101
+ next_page_link = page.link_with text: /Next Page/ # find the next page link
102
+
103
+ next_page = next_page_link.click unless count == 20 # click to next page unless on page 20
104
+ end
105
+
106
+ puts "\n\n(end of search results)"
107
+
108
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: amazon-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - John Mason
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A simple screenscraper to search Amazon
14
+ email: mace2345@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/amazon-search.rb
20
+ homepage: http://rubygems.org/gems/amazon-search
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.6
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Hola!
44
+ test_files: []