craigslister 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/craigslister.rb +26 -33
  3. metadata +22 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d851b7d0a7f00c9cb5493bb1b4ffdb38c1752d2e
4
- data.tar.gz: 80e986b0805484b1f8f8d33c8836547d9b05671b
3
+ metadata.gz: d677df8e37c7dd139665e441aa6581a34ad5b5ed
4
+ data.tar.gz: 93442d5a1e4fbbd4f9500f42f53e17322cda7d18
5
5
  SHA512:
6
- metadata.gz: ee0e39625bde28bfb1d5003804213f1d3ed42c75a2303dcf691a0551d350979167641210768e54ec1e2ad5e77b0c909d9c63e0dd9663d7782c1e9037ddf17520
7
- data.tar.gz: 123d6828752eeedd630360760300541c695c598be0901bd73fbd6b95c2df1bb95c8b1acf9245c84240b2a8498aa3f726d739488cff424066d4915155271cada0
6
+ metadata.gz: 16bde37399ef62404cbfc5ac487bb75aaecc42b5fa1b28297dcce92d83fe4c9f19dbb38a8732b2b2a9686a7efd710bcb537e12c93fc081b9423cb797704e6824
7
+ data.tar.gz: eda7e0e61f236749dbeabc34d0929e345a9f46d643030c705eabea2b811a7984f4143337d70594b2f663c84193cf0e4cdd52b87683f02f478bd73fab1b8e6467
data/lib/craigslister.rb CHANGED
@@ -1,8 +1,5 @@
1
- # Add Item#url
2
- # organize spec files
3
- # figure out how to run spec from any folder
4
-
5
- require 'mechanize'
1
+ require 'nokogiri'
2
+ require 'open-uri'
6
3
 
7
4
  class InvalidRangeError < StandardError
8
5
  end
@@ -11,31 +8,32 @@ class Craigslister
11
8
  attr_reader :area, :item, :high, :low, :results
12
9
 
13
10
  def initialize args
14
- @results = []
15
- @area = args.fetch(:area, 'sfbay')
16
- @item = args[:item]
17
- @high = args.fetch(:high, nil)
18
- @low = args.fetch(:low, nil)
11
+ @results = []
12
+ @area = args.fetch(:area, 'sfbay')
13
+ @item = args[:item]
14
+ @high = args.fetch(:high, nil)
15
+ @low = args.fetch(:low, nil)
19
16
  validate_price_range
20
-
21
- @mech = Mechanize.new
22
- configure_mech
23
17
  end
24
18
 
25
19
  def scrape!
26
- links.map {|link| get_data_from(link)}.compact
20
+ links.each_with_index {|link, index| get_data_from(link, index)}
21
+ results
27
22
  end
28
23
 
29
24
  def url
30
25
  "#{base_url}/"\
31
26
  "search/sss?sort=rel&"\
32
27
  "#{price_query}"\
33
- "query=#{item.downcase.split(' ') * '+'}"\
28
+ "query=#{item.downcase.split(' ') * '+'}"
34
29
  end
35
30
 
36
31
  def links
37
- @mech.get(url)
38
- @mech.page.search('.hdrlnk').map {|link| link['href']}
32
+ page = Nokogiri::HTML(open(url))
33
+ page.css('.hdrlnk').map do |link|
34
+ # formats out of town links, otherwise use base_url + link
35
+ link['href'] =~ /\w+\.craig/ ? "https:" + link['href'] : base_url + link['href']
36
+ end
39
37
  end
40
38
 
41
39
 
@@ -44,19 +42,19 @@ class Craigslister
44
42
  "https://#{area}.craigslist.org"
45
43
  end
46
44
 
47
- def get_data_from link
48
- @mech.get(link)
49
- @results << Item.new(scrape_item_data(link)) rescue p 'No Image'
45
+ def get_data_from link, index
46
+ page = Nokogiri::HTML(open(link))
47
+ @results << Item.new(scrape_item_data(page, link)) rescue puts "No image for post ##{index+1}"
50
48
  end
51
49
 
52
- def scrape_item_data url
50
+ def scrape_item_data page, url
53
51
  {
54
- image: @mech.page.images[0].src,
55
- title: @mech.page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
56
- price: @mech.page.at('span.postingtitletext span.price').text.gsub(/\$/,'').to_i,
57
- location: @mech.page.at('span.postingtitletext small').text.gsub(/ ?[\(\)]/,''),
58
- description: @mech.page.at('section#postingbody').text,
59
- url: "#{base_url}#{url}"
52
+ image: page.at('img')['src'],
53
+ title: page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
54
+ price: page.at('span.postingtitletext span.price').text.gsub(/\$/,'').to_i,
55
+ location: page.at('span.postingtitletext small').text.gsub(/ ?[\(\)]/,''),
56
+ description: page.at('section#postingbody').text,
57
+ url: url
60
58
  }
61
59
  end
62
60
 
@@ -67,13 +65,8 @@ class Craigslister
67
65
  result
68
66
  end
69
67
 
70
- def configure_mech
71
- @mech.robots = false
72
- @mech.user_agent_alias = 'Mac Safari'
73
- end
74
-
75
68
  def validate_price_range
76
- raise InvalidRangeError if (low && high) && low > high
69
+ raise InvalidRangeError if low && high && low > high
77
70
  end
78
71
  end
79
72
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: craigslister
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Scott
@@ -9,7 +9,27 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2015-10-27 00:00:00.000000000 Z
12
- dependencies: []
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.6.6.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.6'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.6.6.2
13
33
  description: all you need is an item title and you can scrape item objects from craigslist
14
34
  email: christo247@gmail.com
15
35
  executables: []