craigslister 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/craigslister.rb +87 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2f7c43f5d674ae551b03f42dc93f75576890281a
4
+ data.tar.gz: ad9657c211afc293e8691c63a27dc2c955761e4e
5
+ SHA512:
6
+ metadata.gz: 3485dfec92f4f3b9b866fc02d003a656606c6450aef079e4f8e65864a83671168b458ac2a4236affecc67eecdb3bd50a6c43c1b09c0a918f5aff4da513ff4f83
7
+ data.tar.gz: 34d344fd148d581997485196755c1df024a6dca845152979cf3320addfbf46b096786c805d016b2c7f5eb7242b338750e5b3d19535db14ffdb3404084ca15167
@@ -0,0 +1,87 @@
1
+ require 'mechanize'
2
+
3
+ class InvalidRangeError < StandardError
4
+ end
5
+
6
+ class Craigslister
7
+ attr_reader :area, :item, :high, :low, :results
8
+
9
+ def initialize args
10
+ @results = []
11
+ @area = args.fetch(:area, 'sfbay')
12
+ @item = args[:item]
13
+ @high = args.fetch(:high, nil)
14
+ @low = args.fetch(:low, nil)
15
+ validate_price_range
16
+
17
+ @mech = Mechanize.new
18
+ configure_mech
19
+ end
20
+
21
+ def scrape
22
+ links.each do |link|
23
+ get_data_from(link)
24
+ end
25
+ end
26
+
27
+ def url
28
+ "#{base_url}"\
29
+ "search/sss?sort=rel&"\
30
+ "#{price_query}"\
31
+ "query=#{item.downcase.split(' ') * '+'}"\
32
+ end
33
+
34
+ def links
35
+ @mech.get(url)
36
+ @mech.page.search('.hdrlnk').map {|link| link['href']}
37
+ end
38
+
39
+
40
+ private
41
+ def base_url
42
+ "https://#{area}.craigslist.org/"
43
+ end
44
+
45
+ def get_data_from link
46
+ @mech.get(link)
47
+ @results << Item.new(scrape_item_data) rescue p 'no image'
48
+ end
49
+
50
+ def scrape_item_data
51
+ {
52
+ image: @mech.page.images[0].src,
53
+ title: @mech.page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
54
+ price: @mech.page.at('span.postingtitletext span.price').text.gsub(/\$/,'').to_i,
55
+ location: @mech.page.at('span.postingtitletext small').text.gsub(/ ?[\(\)]/,''),
56
+ description: @mech.page.at('section#postingbody').text
57
+ }
58
+ end
59
+
60
+ def price_query
61
+ result = ''
62
+ result += "min_price=#{low}&" if low
63
+ result += "max_price=#{high}&" if high
64
+ result
65
+ end
66
+
67
+ def configure_mech
68
+ @mech.robots = false
69
+ @mech.user_agent_alias = 'Mac Safari'
70
+ end
71
+
72
+ def validate_price_range
73
+ raise InvalidRangeError if (low && high) && low > high
74
+ end
75
+ end
76
+
77
+ class Item
78
+ attr_reader :title, :image, :price, :location
79
+
80
+ def initialize args
81
+ @title = args[:title]
82
+ @image = args[:image]
83
+ @price = args[:price]
84
+ @location = args[:location]
85
+ end
86
+ end
87
+
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: craigslister
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Chris Scott
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-26 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: all you need is an item title and you can scrape item objects from craigslist
14
+ email: christo247@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/craigslister.rb
20
+ homepage: http://rubygems.org/gems/craigslister
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.5
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Scrape Craigslist!!!!! heh heh heh
44
+ test_files: []