craigslister 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/craigslister.rb +51 -28
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e1d5d4462a643f2ffcc73f70e2fe18c26f2cd05
|
4
|
+
data.tar.gz: 784647815b81ed5fcb46cf717c57b3fe85a29058
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07101df2df31e3acf6b4d5b14d44c4d9ad00cf13b72d0d5ee6e3816a1f57cc2236ef8d501f6b380956b7d39c8c0c1eb345caf65735bf08e621a00bd7069798c0
|
7
|
+
data.tar.gz: 2db66d6ac5ccf8de5f25d6393b0d721f36307e2d008717de5dba6d2849799e75322089dda412347ae41ed2bea9f80c18a352d5b00358b8937be00a2c480c03dc
|
data/lib/craigslister.rb
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'open-uri'
|
3
3
|
|
4
|
+
|
4
5
|
class InvalidRangeError < StandardError
|
5
6
|
end
|
6
7
|
|
8
|
+
|
7
9
|
class Craigslister
|
8
|
-
attr_reader :area, :item, :high, :low
|
10
|
+
attr_reader :area, :item, :high, :low
|
9
11
|
|
10
12
|
def initialize args
|
11
|
-
@results = []
|
12
13
|
@area = args.fetch(:area, 'sfbay')
|
13
14
|
@item = args[:item]
|
14
15
|
@high = args.fetch(:high, nil)
|
@@ -17,23 +18,17 @@ class Craigslister
|
|
17
18
|
end
|
18
19
|
|
19
20
|
def scrape!
|
20
|
-
links.
|
21
|
-
results
|
21
|
+
links.map {|link| item_from(link)}.compact
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
|
26
|
-
"search/sss?sort=rel&"\
|
27
|
-
"#{price_query}"\
|
28
|
-
"query=#{item.downcase.split(' ') * '+'}"
|
24
|
+
def links
|
25
|
+
page_from(url).css('.hdrlnk').map {|link| format_link(link)}
|
29
26
|
end
|
30
27
|
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
link['href'] =~ /\w+\.craig/ ? "https:" + link['href'] : base_url + link['href']
|
36
|
-
end
|
28
|
+
def url
|
29
|
+
"#{base_url}/search/sss?sort=rel&"\
|
30
|
+
"#{price_query}query="\
|
31
|
+
"#{item.downcase.split(' ') * '+'}"
|
37
32
|
end
|
38
33
|
|
39
34
|
|
@@ -42,20 +37,12 @@ class Craigslister
|
|
42
37
|
"https://#{area}.craigslist.org"
|
43
38
|
end
|
44
39
|
|
45
|
-
def
|
46
|
-
|
47
|
-
@results << Item.new(scrape_item_data(page, link)) rescue puts "No image for post ##{index+1}"
|
40
|
+
def page_from url
|
41
|
+
Nokogiri::HTML(open(url))
|
48
42
|
end
|
49
43
|
|
50
|
-
def
|
51
|
-
|
52
|
-
image: page.at('img')['src'],
|
53
|
-
title: page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
|
54
|
-
price: page.at('span.postingtitletext span.price').text.gsub(/\$/,'').to_i,
|
55
|
-
location: page.at('span.postingtitletext small').text.gsub(/ ?[\(\)]/,''),
|
56
|
-
description: page.at('section#postingbody').text,
|
57
|
-
url: url
|
58
|
-
}
|
44
|
+
def format_link link
|
45
|
+
link['href'] =~ /\w+\.craig/ ? "https:" + link['href'] : base_url + link['href']
|
59
46
|
end
|
60
47
|
|
61
48
|
def price_query
|
@@ -68,8 +55,45 @@ class Craigslister
|
|
68
55
|
def validate_price_range
|
69
56
|
raise InvalidRangeError if low && high && low > high
|
70
57
|
end
|
58
|
+
|
59
|
+
def item_from link
|
60
|
+
Item.new(get_item_data(page_from(link), link))
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_item_data page, link
|
64
|
+
{
|
65
|
+
image: scrape_image(page),
|
66
|
+
title: page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
|
67
|
+
price: scrape_price(page),
|
68
|
+
location: scrape_location(page),
|
69
|
+
description: page.at('section#postingbody').text,
|
70
|
+
url: link
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
def scrape_image page
|
75
|
+
page.at('img') ? page.at('img')['src'] : ""
|
76
|
+
end
|
77
|
+
|
78
|
+
def scrape_price page
|
79
|
+
if price = page.at('span.postingtitletext span.price')
|
80
|
+
price.text.gsub(/\$/,'').to_i
|
81
|
+
else
|
82
|
+
0
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def scrape_location page
|
87
|
+
if location = page.at('span.postingtitletext small')
|
88
|
+
location.text.gsub(/ ?[\(\)]/,'')
|
89
|
+
else
|
90
|
+
""
|
91
|
+
end
|
92
|
+
end
|
71
93
|
end
|
72
94
|
|
95
|
+
|
96
|
+
|
73
97
|
class Item
|
74
98
|
attr_reader :title, :image, :price, :location, :url
|
75
99
|
|
@@ -81,4 +105,3 @@ class Item
|
|
81
105
|
@url = args[:url]
|
82
106
|
end
|
83
107
|
end
|
84
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: craigslister
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Scott
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -37,7 +37,7 @@ extensions: []
|
|
37
37
|
extra_rdoc_files: []
|
38
38
|
files:
|
39
39
|
- lib/craigslister.rb
|
40
|
-
homepage:
|
40
|
+
homepage: https://github.com/Yago580/craigslister
|
41
41
|
licenses:
|
42
42
|
- MIT
|
43
43
|
metadata: {}
|