craigslister 2.0.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e1d5d4462a643f2ffcc73f70e2fe18c26f2cd05
4
- data.tar.gz: 784647815b81ed5fcb46cf717c57b3fe85a29058
3
+ metadata.gz: 83f273dd97db5b9e9322851b3b69a5818194c39e
4
+ data.tar.gz: a53505d6d77de5be42ff950c88d9379289fb3e8a
5
5
  SHA512:
6
- metadata.gz: 07101df2df31e3acf6b4d5b14d44c4d9ad00cf13b72d0d5ee6e3816a1f57cc2236ef8d501f6b380956b7d39c8c0c1eb345caf65735bf08e621a00bd7069798c0
7
- data.tar.gz: 2db66d6ac5ccf8de5f25d6393b0d721f36307e2d008717de5dba6d2849799e75322089dda412347ae41ed2bea9f80c18a352d5b00358b8937be00a2c480c03dc
6
+ metadata.gz: 813c4dc83d6f84642167220e7f8f25a5bff5aef84bd491b12a8065de2ff20a49f4645743169853645ced314de9a73595d8788857ea43aa1e5977acd81582521e
7
+ data.tar.gz: 7cac96de0d2b7852b90d9d57ba4a045ab5641e3f36b2c4a264c8a9f26c7722a5af1c34e151f7ca1f82be2eee85aec91b9c1db1b8230ccaac7b12f827eb8b5b4e
@@ -0,0 +1,52 @@
1
+ # Thrown when low price is higher than high price
2
+ class InvalidRangeError < StandardError
3
+ end
4
+
5
+ # Creates url from arguments and scrapes
6
+ class Craigslister
7
+ attr_reader :area, :item, :high, :low
8
+
9
+ def initialize(args)
10
+ @area = args.fetch(:area, 'sfbay')
11
+ @item = args[:item]
12
+ @high = args.fetch(:high, nil)
13
+ @low = args.fetch(:low, nil)
14
+ validate_price_range
15
+ end
16
+
17
+ def scrape
18
+ scraper.scrape
19
+ end
20
+
21
+ def links
22
+ scraper.links
23
+ end
24
+
25
+ def url
26
+ "#{base_url}/search/sss?sort=rel&"\
27
+ "#{price_query}query="\
28
+ "#{item.downcase.split(' ') * '+'}"
29
+ end
30
+
31
+ private
32
+
33
+ def scraper
34
+ Scraper.new(url, base_url)
35
+ end
36
+
37
+ def base_url
38
+ "https://#{area}.craigslist.org"
39
+ end
40
+
41
+ def price_query
42
+ result = ''
43
+ result += "min_price=#{low}&" if low
44
+ result += "max_price=#{high}&" if high
45
+ result
46
+ end
47
+
48
+ def validate_price_range
49
+ return unless low && high && low > high
50
+ fail(InvalidRangeError, 'Price range is invalid.')
51
+ end
52
+ end
@@ -0,0 +1,13 @@
1
+ # Used for packaging Craigslist post data
2
+ class Post
3
+ attr_reader :title, :image, :price, :location, :url, :description
4
+
5
+ def initialize(args)
6
+ @title = args[:title]
7
+ @image = args[:image]
8
+ @price = args[:price]
9
+ @location = args[:location]
10
+ @description = args[:description]
11
+ @url = args[:url]
12
+ end
13
+ end
@@ -0,0 +1,49 @@
1
+ # Creates Post objects out of an HTML page
2
+ class PostScraper
3
+ def initialize(page, link)
4
+ @page = page
5
+ @link = link
6
+ end
7
+
8
+ def new_post
9
+ Post.new(
10
+ image: image,
11
+ title: title,
12
+ price: price,
13
+ location: location,
14
+ description: description,
15
+ url: link
16
+ )
17
+ end
18
+
19
+ private
20
+
21
+ attr_reader :page, :link
22
+
23
+ def posting_title
24
+ page.at('span.postingtitletext')
25
+ end
26
+
27
+ def image
28
+ image = page.at('img')
29
+ image ? image['src'] : ''
30
+ end
31
+
32
+ def title
33
+ posting_title.text.gsub(/ ?- ?\$\d+ ?\(.+\)/, '')
34
+ end
35
+
36
+ def price
37
+ price = posting_title.at('span.price')
38
+ price ? price.text.gsub(/\$/, '').to_i : 0
39
+ end
40
+
41
+ def location
42
+ location = posting_title.at('small')
43
+ location ? location.text.gsub(/ ?[\(\)]/, '') : ''
44
+ end
45
+
46
+ def description
47
+ page.at('section#postingbody').text
48
+ end
49
+ end
@@ -0,0 +1,39 @@
1
+ # Houses all higher level scraping logic
2
+ class Scraper
3
+ def initialize(url, base_url)
4
+ @url = url
5
+ @base_url = base_url
6
+ end
7
+
8
+ def links
9
+ header_link.map { |link| format_link(link['href']) }
10
+ end
11
+
12
+ def scrape
13
+ links.flat_map { |link| post_from(link) }
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :url, :base_url
19
+
20
+ def page_from(url)
21
+ Nokogiri::HTML(open(url))
22
+ end
23
+
24
+ def post_from(link)
25
+ PostScraper.new(page_from(link), link).new_post
26
+ end
27
+
28
+ def header_link
29
+ page_from(url).css('.hdrlnk')
30
+ end
31
+
32
+ def format_link(link)
33
+ if link =~ /\w+\.craig/
34
+ 'https:' + link
35
+ else
36
+ base_url + link
37
+ end
38
+ end
39
+ end
data/lib/craigslister.rb CHANGED
@@ -1,107 +1,6 @@
1
1
  require 'nokogiri'
2
2
  require 'open-uri'
3
-
4
-
5
- class InvalidRangeError < StandardError
6
- end
7
-
8
-
9
- class Craigslister
10
- attr_reader :area, :item, :high, :low
11
-
12
- def initialize args
13
- @area = args.fetch(:area, 'sfbay')
14
- @item = args[:item]
15
- @high = args.fetch(:high, nil)
16
- @low = args.fetch(:low, nil)
17
- validate_price_range
18
- end
19
-
20
- def scrape!
21
- links.map {|link| item_from(link)}.compact
22
- end
23
-
24
- def links
25
- page_from(url).css('.hdrlnk').map {|link| format_link(link)}
26
- end
27
-
28
- def url
29
- "#{base_url}/search/sss?sort=rel&"\
30
- "#{price_query}query="\
31
- "#{item.downcase.split(' ') * '+'}"
32
- end
33
-
34
-
35
- private
36
- def base_url
37
- "https://#{area}.craigslist.org"
38
- end
39
-
40
- def page_from url
41
- Nokogiri::HTML(open(url))
42
- end
43
-
44
- def format_link link
45
- link['href'] =~ /\w+\.craig/ ? "https:" + link['href'] : base_url + link['href']
46
- end
47
-
48
- def price_query
49
- result = ''
50
- result += "min_price=#{low}&" if low
51
- result += "max_price=#{high}&" if high
52
- result
53
- end
54
-
55
- def validate_price_range
56
- raise InvalidRangeError if low && high && low > high
57
- end
58
-
59
- def item_from link
60
- Item.new(get_item_data(page_from(link), link))
61
- end
62
-
63
- def get_item_data page, link
64
- {
65
- image: scrape_image(page),
66
- title: page.at('span.postingtitletext').text.gsub(/ ?- ?\$\d+ ?\(.+\)/, ''),
67
- price: scrape_price(page),
68
- location: scrape_location(page),
69
- description: page.at('section#postingbody').text,
70
- url: link
71
- }
72
- end
73
-
74
- def scrape_image page
75
- page.at('img') ? page.at('img')['src'] : ""
76
- end
77
-
78
- def scrape_price page
79
- if price = page.at('span.postingtitletext span.price')
80
- price.text.gsub(/\$/,'').to_i
81
- else
82
- 0
83
- end
84
- end
85
-
86
- def scrape_location page
87
- if location = page.at('span.postingtitletext small')
88
- location.text.gsub(/ ?[\(\)]/,'')
89
- else
90
- ""
91
- end
92
- end
93
- end
94
-
95
-
96
-
97
- class Item
98
- attr_reader :title, :image, :price, :location, :url
99
-
100
- def initialize args
101
- @title = args[:title]
102
- @image = args[:image]
103
- @price = args[:price]
104
- @location = args[:location]
105
- @url = args[:url]
106
- end
107
- end
3
+ require 'craigslister/scraper'
4
+ require 'craigslister/post_scraper'
5
+ require 'craigslister/post'
6
+ require 'craigslister/craigslister'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: craigslister
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Scott
@@ -30,13 +30,17 @@ dependencies:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: 1.6.6.2
33
- description: all you need is an item title and you can scrape item objects from craigslist
33
+ description: all you need is an item title and you can scrape posts from craigslist
34
34
  email: christo247@gmail.com
35
35
  executables: []
36
36
  extensions: []
37
37
  extra_rdoc_files: []
38
38
  files:
39
39
  - lib/craigslister.rb
40
+ - lib/craigslister/craigslister.rb
41
+ - lib/craigslister/post.rb
42
+ - lib/craigslister/post_scraper.rb
43
+ - lib/craigslister/scraper.rb
40
44
  homepage: https://github.com/Yago580/craigslister
41
45
  licenses:
42
46
  - MIT
@@ -57,8 +61,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
61
  version: '0'
58
62
  requirements: []
59
63
  rubyforge_project:
60
- rubygems_version: 2.4.5
64
+ rubygems_version: 2.4.5.1
61
65
  signing_key:
62
66
  specification_version: 4
63
- summary: Scrape Craigslist for item objects
67
+ summary: Scrape Craigslist for Posts
64
68
  test_files: []