craiggy 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6ddd12ddb0d4c32992b841a92965ee0fca1234fb
4
+ data.tar.gz: b45d8d31c309b16e26ee0751cb78097b215e09fd
5
+ SHA512:
6
+ metadata.gz: 1d9dbbf75ef9b0373214ab58fe3b7c6b02298428ad15bf80b39f3c56b4a30327cf1dfdcdeba91325666298921f1f74fd496c7a05ccbedef5a57d03a51ef17c66
7
+ data.tar.gz: ffbc5d0af69dec30fb7b1e2ae4d976ead60bdd76794f9e86f326de6c753ed1466e00e07c4d51f445e3c87bc0467531eff0e43451ffc748c051c13d242062056c
@@ -0,0 +1,42 @@
1
+ require "mechanize"
2
+
3
+ require "craiggy/post"
4
+ require "craiggy/post_url"
5
+ require "craiggy/categories"
6
+
7
+ class Craiggy
8
+ CATEGORY = "sss"
9
+
10
+ def self.get(item, *arguments)
11
+ initialize(item, *arguments)
12
+ scrape_posts
13
+ end
14
+
15
+ def self.categories
16
+ self::CATEGORIES.keys
17
+ end
18
+
19
+ class << self
20
+ private
21
+ attr_reader :url
22
+
23
+ def initialize(item, *arguments)
24
+ @url = PostUrl.new(item, self::CATEGORY, *arguments).url
25
+ end
26
+
27
+ def scrape_posts
28
+ matching_links.map { |link| Post.new(link.click) }
29
+ end
30
+
31
+ def matching_links
32
+ mech.get(url).links_with(dom_class: "result-title hdrlnk")
33
+ end
34
+
35
+ def mech
36
+ Mechanize.new do |agent|
37
+ agent.robots = false
38
+ agent.user_agent_alias = "Mac Safari"
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,20 @@
1
+ class Craiggy
2
+ CATEGORIES = {
3
+ "Community" => "ccc",
4
+ "Events" => "eee",
5
+ "ForSale" => "sss",
6
+ "Gigs" => "ggg",
7
+ "Housing" => "hhh",
8
+ "Jobs" => "jjj",
9
+ "Personals" => "ppp",
10
+ "Resumes" => "rrr",
11
+ "Services" => "bbb",
12
+ }.freeze
13
+ end
14
+
15
+ Craiggy::CATEGORIES.each do |namespace, category_code|
16
+ Craiggy.const_set(
17
+ namespace,
18
+ Class.new(Craiggy) { self::CATEGORY = category_code },
19
+ )
20
+ end
@@ -0,0 +1,70 @@
1
+ # Used for packaging Craigslist post data
2
+ class Craiggy
3
+ class Post
4
+ attr_reader :title, :image, :price, :location, :url, :description
5
+
6
+ def initialize(page)
7
+ post_params = PostScraper.new(page).post_params
8
+ @title = post_params[:title]
9
+ @image = post_params[:image]
10
+ @price = post_params[:price]
11
+ @location = post_params[:location]
12
+ @description = post_params[:description]
13
+ @url = post_params[:url]
14
+ end
15
+
16
+ def has_image?
17
+ image != ""
18
+ end
19
+ end
20
+
21
+
22
+ # Scrapes and formats `post_params` for easy `Post` initialization
23
+ class PostScraper
24
+ def initialize(page)
25
+ @page = page
26
+ end
27
+
28
+ def post_params
29
+ {
30
+ image: image,
31
+ title: title,
32
+ price: price,
33
+ location: location,
34
+ description: description,
35
+ url: page.uri.to_s,
36
+ }
37
+ end
38
+
39
+ private
40
+
41
+ attr_reader :page
42
+
43
+ def posting_title
44
+ page.at('span.postingtitletext')
45
+ end
46
+
47
+ def image
48
+ image = page.at('img')
49
+ image ? image['src'] : ''
50
+ end
51
+
52
+ def title
53
+ page.at("#titletextonly").text
54
+ end
55
+
56
+ def price
57
+ price = posting_title.at('span.price')
58
+ price ? price.text.gsub(/\$/, '').to_i : 0
59
+ end
60
+
61
+ def location
62
+ location = posting_title.at('small')
63
+ location = location ? location.text.gsub(/ ?[\(\)]/, '') : ''
64
+ end
65
+
66
+ def description
67
+ page.at('section#postingbody').children[2].text.strip
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,41 @@
1
+ class Craiggy
2
+ class PostUrl
3
+ def initialize(item, category, *options)
4
+ @item = item
5
+ @category = category
6
+ @options = options.first || {}
7
+ end
8
+
9
+ def url
10
+ "#{base_url}/search/#{category}?sort=rel&"\
11
+ "#{price_range}query="\
12
+ "#{item.downcase.split(' ') * '+'}"
13
+ end
14
+
15
+ private
16
+ attr_reader :item, :category, :options
17
+
18
+ def base_url
19
+ "https://#{area}.craigslist.org"
20
+ end
21
+
22
+ def price_range
23
+ query = ""
24
+ query += "min_price=#{min_price}&" unless min_price.nil?
25
+ query += "max_price=#{max_price}&" unless max_price.nil?
26
+ query
27
+ end
28
+
29
+ def area
30
+ options[:area] || "sfbay"
31
+ end
32
+
33
+ def min_price
34
+ options[:min_price]
35
+ end
36
+
37
+ def max_price
38
+ options[:max_price]
39
+ end
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: craiggy
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Scott
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-01-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.4'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: webmock
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.22'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.22'
55
+ - !ruby/object:Gem::Dependency
56
+ name: vcr
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ description: All you need is an item title and Craiggy will scrape Craigslist and
70
+ return an array of Posts pertaining to the item.
71
+ email: christo247@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - lib/craiggy.rb
77
+ - lib/craiggy/categories.rb
78
+ - lib/craiggy/post.rb
79
+ - lib/craiggy/post_url.rb
80
+ homepage: https://github.com/steezyduzit/craiggy
81
+ licenses:
82
+ - MIT
83
+ metadata: {}
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project:
100
+ rubygems_version: 2.5.1
101
+ signing_key:
102
+ specification_version: 4
103
+ summary: Scrape posts from Craigslist with an easy API
104
+ test_files: []