craigslister 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/craigslister.rb +26 -33
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d677df8e37c7dd139665e441aa6581a34ad5b5ed
|
4
|
+
data.tar.gz: 93442d5a1e4fbbd4f9500f42f53e17322cda7d18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16bde37399ef62404cbfc5ac487bb75aaecc42b5fa1b28297dcce92d83fe4c9f19dbb38a8732b2b2a9686a7efd710bcb537e12c93fc081b9423cb797704e6824
|
7
|
+
data.tar.gz: eda7e0e61f236749dbeabc34d0929e345a9f46d643030c705eabea2b811a7984f4143337d70594b2f663c84193cf0e4cdd52b87683f02f478bd73fab1b8e6467
|
data/lib/craigslister.rb
CHANGED
@@ -1,8 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# figure out how to run spec from any folder
|
4
|
-
|
5
|
-
require 'mechanize'
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
6
3
|
|
7
4
|
class InvalidRangeError < StandardError
|
8
5
|
end
|
@@ -11,31 +8,32 @@ class Craigslister
|
|
11
8
|
attr_reader :area, :item, :high, :low, :results
|
12
9
|
|
13
10
|
def initialize args
|
14
|
-
@results
|
15
|
-
@area
|
16
|
-
@item
|
17
|
-
@high
|
18
|
-
@low
|
11
|
+
@results = []
|
12
|
+
@area = args.fetch(:area, 'sfbay')
|
13
|
+
@item = args[:item]
|
14
|
+
@high = args.fetch(:high, nil)
|
15
|
+
@low = args.fetch(:low, nil)
|
19
16
|
validate_price_range
|
20
|
-
|
21
|
-
@mech = Mechanize.new
|
22
|
-
configure_mech
|
23
17
|
end
|
24
18
|
|
25
19
|
def scrape!
|
26
|
-
links.
|
20
|
+
links.each_with_index {|link, index| get_data_from(link, index)}
|
21
|
+
results
|
27
22
|
end
|
28
23
|
|
29
24
|
def url
|
30
25
|
"#{base_url}/"\
|
31
26
|
"search/sss?sort=rel&"\
|
32
27
|
"#{price_query}"\
|
33
|
-
"query=#{item.downcase.split(' ') * '+'}"
|
28
|
+
"query=#{item.downcase.split(' ') * '+'}"
|
34
29
|
end
|
35
30
|
|
36
31
|
def links
|
37
|
-
|
38
|
-
|
32
|
+
page = Nokogiri::HTML(open(url))
|
33
|
+
page.css('.hdrlnk').map do |link|
|
34
|
+
# formats out of town links, otherwise use base_url + link
|
35
|
+
link['href'] =~ /\w+\.craig/ ? "https:" + link['href'] : base_url + link['href']
|
36
|
+
end
|
39
37
|
end
|
40
38
|
|
41
39
|
|
@@ -44,19 +42,19 @@ class Craigslister
|
|
44
42
|
"https://#{area}.craigslist.org"
|
45
43
|
end
|
46
44
|
|
47
|
-
def get_data_from link
|
48
|
-
|
49
|
-
@results << Item.new(scrape_item_data(link)) rescue
|
45
|
+
def get_data_from link, index
|
46
|
+
page = Nokogiri::HTML(open(link))
|
47
|
+
@results << Item.new(scrape_item_data(page, link)) rescue puts "No image for post ##{index+1}"
|
50
48
|
end
|
51
49
|
|
52
|
-
def scrape_item_data url
|
50
|
+
def scrape_item_data page, url
|
53
51
|
{
|
54
|
-
image:
|
55
|
-
title:
|
56
|
-
price:
|
57
|
-
location:
|
58
|
-
description:
|
59
|
-
url:
|
52
|
+
image: page.at('img')['src'],
|
53
|
+
title: page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
|
54
|
+
price: page.at('span.postingtitletext span.price').text.gsub(/\$/,'').to_i,
|
55
|
+
location: page.at('span.postingtitletext small').text.gsub(/ ?[\(\)]/,''),
|
56
|
+
description: page.at('section#postingbody').text,
|
57
|
+
url: url
|
60
58
|
}
|
61
59
|
end
|
62
60
|
|
@@ -67,13 +65,8 @@ class Craigslister
|
|
67
65
|
result
|
68
66
|
end
|
69
67
|
|
70
|
-
def configure_mech
|
71
|
-
@mech.robots = false
|
72
|
-
@mech.user_agent_alias = 'Mac Safari'
|
73
|
-
end
|
74
|
-
|
75
68
|
def validate_price_range
|
76
|
-
raise InvalidRangeError if
|
69
|
+
raise InvalidRangeError if low && high && low > high
|
77
70
|
end
|
78
71
|
end
|
79
72
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: craigslister
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Scott
|
@@ -9,7 +9,27 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2015-10-27 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.6.6.2
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.6'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.6.6.2
|
13
33
|
description: all you need is an item title and you can scrape item objects from craigslist
|
14
34
|
email: christo247@gmail.com
|
15
35
|
executables: []
|