craigslister 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/craigslister.rb +51 -28
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: baee98976e89041bd66fa0185d57da5c795d4528
4
- data.tar.gz: b433e97471302f67371aa8e8156f702730eff626
3
+ metadata.gz: 6e1d5d4462a643f2ffcc73f70e2fe18c26f2cd05
4
+ data.tar.gz: 784647815b81ed5fcb46cf717c57b3fe85a29058
5
5
  SHA512:
6
- metadata.gz: d0fcd65f9746e6051c4429cab01b80b1e62f847a6c4cc68f6c12d7a2a375101db21ffe83bf26146719e6b17373a25324f03bfa3ca1d8b60cfc95aa3da76cc234
7
- data.tar.gz: 74c642fe4c6344ca693a4de612851ecb97f1612e91b954c9dde1057e3e70e7e2b5789a3cae27b6a399ad98b11a4b8e95672d71339b15fcf0247f4b2b4c4797a7
6
+ metadata.gz: 07101df2df31e3acf6b4d5b14d44c4d9ad00cf13b72d0d5ee6e3816a1f57cc2236ef8d501f6b380956b7d39c8c0c1eb345caf65735bf08e621a00bd7069798c0
7
+ data.tar.gz: 2db66d6ac5ccf8de5f25d6393b0d721f36307e2d008717de5dba6d2849799e75322089dda412347ae41ed2bea9f80c18a352d5b00358b8937be00a2c480c03dc
data/lib/craigslister.rb CHANGED
@@ -1,14 +1,15 @@
1
1
  require 'nokogiri'
2
2
  require 'open-uri'
3
3
 
4
+
4
5
  class InvalidRangeError < StandardError
5
6
  end
6
7
 
8
+
7
9
  class Craigslister
8
- attr_reader :area, :item, :high, :low, :results
10
+ attr_reader :area, :item, :high, :low
9
11
 
10
12
  def initialize args
11
- @results = []
12
13
  @area = args.fetch(:area, 'sfbay')
13
14
  @item = args[:item]
14
15
  @high = args.fetch(:high, nil)
@@ -17,23 +18,17 @@ class Craigslister
17
18
  end
18
19
 
19
20
  def scrape!
20
- links.each_with_index {|link, index| get_data_from(link, index)}
21
- results
21
+ links.map {|link| item_from(link)}.compact
22
22
  end
23
23
 
24
- def url
25
- "#{base_url}/"\
26
- "search/sss?sort=rel&"\
27
- "#{price_query}"\
28
- "query=#{item.downcase.split(' ') * '+'}"
24
+ def links
25
+ page_from(url).css('.hdrlnk').map {|link| format_link(link)}
29
26
  end
30
27
 
31
- def links
32
- page = Nokogiri::HTML(open(url))
33
- page.css('.hdrlnk').map do |link|
34
- # formats out of town links, otherwise use base_url + link
35
- link['href'] =~ /\w+\.craig/ ? "https:" + link['href'] : base_url + link['href']
36
- end
28
+ def url
29
+ "#{base_url}/search/sss?sort=rel&"\
30
+ "#{price_query}query="\
31
+ "#{item.downcase.split(' ') * '+'}"
37
32
  end
38
33
 
39
34
 
@@ -42,20 +37,12 @@ class Craigslister
42
37
  "https://#{area}.craigslist.org"
43
38
  end
44
39
 
45
- def get_data_from link, index
46
- page = Nokogiri::HTML(open(link))
47
- @results << Item.new(scrape_item_data(page, link)) rescue puts "No image for post ##{index+1}"
40
+ def page_from url
41
+ Nokogiri::HTML(open(url))
48
42
  end
49
43
 
50
- def scrape_item_data page, url
51
- {
52
- image: page.at('img')['src'],
53
- title: page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
54
- price: page.at('span.postingtitletext span.price').text.gsub(/\$/,'').to_i,
55
- location: page.at('span.postingtitletext small').text.gsub(/ ?[\(\)]/,''),
56
- description: page.at('section#postingbody').text,
57
- url: url
58
- }
44
+ def format_link link
45
+ link['href'] =~ /\w+\.craig/ ? "https:" + link['href'] : base_url + link['href']
59
46
  end
60
47
 
61
48
  def price_query
@@ -68,8 +55,45 @@ class Craigslister
68
55
  def validate_price_range
69
56
  raise InvalidRangeError if low && high && low > high
70
57
  end
58
+
59
+ def item_from link
60
+ Item.new(get_item_data(page_from(link), link))
61
+ end
62
+
63
+ def get_item_data page, link
64
+ {
65
+ image: scrape_image(page),
66
+ title: page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
67
+ price: scrape_price(page),
68
+ location: scrape_location(page),
69
+ description: page.at('section#postingbody').text,
70
+ url: link
71
+ }
72
+ end
73
+
74
+ def scrape_image page
75
+ page.at('img') ? page.at('img')['src'] : ""
76
+ end
77
+
78
+ def scrape_price page
79
+ if price = page.at('span.postingtitletext span.price')
80
+ price.text.gsub(/\$/,'').to_i
81
+ else
82
+ 0
83
+ end
84
+ end
85
+
86
+ def scrape_location page
87
+ if location = page.at('span.postingtitletext small')
88
+ location.text.gsub(/ ?[\(\)]/,'')
89
+ else
90
+ ""
91
+ end
92
+ end
71
93
  end
72
94
 
95
+
96
+
73
97
  class Item
74
98
  attr_reader :title, :image, :price, :location, :url
75
99
 
@@ -81,4 +105,3 @@ class Item
81
105
  @url = args[:url]
82
106
  end
83
107
  end
84
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: craigslister
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Scott
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-27 00:00:00.000000000 Z
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -37,7 +37,7 @@ extensions: []
37
37
  extra_rdoc_files: []
38
38
  files:
39
39
  - lib/craigslister.rb
40
- homepage: http://rubygems.org/gems/craigslister
40
+ homepage: https://github.com/Yago580/craigslister
41
41
  licenses:
42
42
  - MIT
43
43
  metadata: {}