craiggy 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6ddd12ddb0d4c32992b841a92965ee0fca1234fb
4
+ data.tar.gz: b45d8d31c309b16e26ee0751cb78097b215e09fd
5
+ SHA512:
6
+ metadata.gz: 1d9dbbf75ef9b0373214ab58fe3b7c6b02298428ad15bf80b39f3c56b4a30327cf1dfdcdeba91325666298921f1f74fd496c7a05ccbedef5a57d03a51ef17c66
7
+ data.tar.gz: ffbc5d0af69dec30fb7b1e2ae4d976ead60bdd76794f9e86f326de6c753ed1466e00e07c4d51f445e3c87bc0467531eff0e43451ffc748c051c13d242062056c
@@ -0,0 +1,42 @@
1
+ require "mechanize"
2
+
3
+ require "craiggy/post"
4
+ require "craiggy/post_url"
5
+ require "craiggy/categories"
6
+
7
+ class Craiggy
8
+ CATEGORY = "sss"
9
+
10
+ def self.get(item, *arguments)
11
+ initialize(item, *arguments)
12
+ scrape_posts
13
+ end
14
+
15
+ def self.categories
16
+ self::CATEGORIES.keys
17
+ end
18
+
19
+ class << self
20
+ private
21
+ attr_reader :url
22
+
23
+ def initialize(item, *arguments)
24
+ @url = PostUrl.new(item, self::CATEGORY, *arguments).url
25
+ end
26
+
27
+ def scrape_posts
28
+ matching_links.map { |link| Post.new(link.click) }
29
+ end
30
+
31
+ def matching_links
32
+ mech.get(url).links_with(dom_class: "result-title hdrlnk")
33
+ end
34
+
35
+ def mech
36
+ Mechanize.new do |agent|
37
+ agent.robots = false
38
+ agent.user_agent_alias = "Mac Safari"
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,20 @@
1
+ class Craiggy
2
+ CATEGORIES = {
3
+ "Community" => "ccc",
4
+ "Events" => "eee",
5
+ "ForSale" => "sss",
6
+ "Gigs" => "ggg",
7
+ "Housing" => "hhh",
8
+ "Jobs" => "jjj",
9
+ "Personals" => "ppp",
10
+ "Resumes" => "rrr",
11
+ "Services" => "bbb",
12
+ }.freeze
13
+ end
14
+
15
+ Craiggy::CATEGORIES.each do |namespace, category_code|
16
+ Craiggy.const_set(
17
+ namespace,
18
+ Class.new(Craiggy) { self::CATEGORY = category_code },
19
+ )
20
+ end
@@ -0,0 +1,70 @@
1
+ # Used for packaging Craigslist post data
2
+ class Craiggy
3
+ class Post
4
+ attr_reader :title, :image, :price, :location, :url, :description
5
+
6
+ def initialize(page)
7
+ post_params = PostScraper.new(page).post_params
8
+ @title = post_params[:title]
9
+ @image = post_params[:image]
10
+ @price = post_params[:price]
11
+ @location = post_params[:location]
12
+ @description = post_params[:description]
13
+ @url = post_params[:url]
14
+ end
15
+
16
+ def has_image?
17
+ image != ""
18
+ end
19
+ end
20
+
21
+
22
+ # Scrapes and formats `post_params` for easy `Post` initialization
23
+ class PostScraper
24
+ def initialize(page)
25
+ @page = page
26
+ end
27
+
28
+ def post_params
29
+ {
30
+ image: image,
31
+ title: title,
32
+ price: price,
33
+ location: location,
34
+ description: description,
35
+ url: page.uri.to_s,
36
+ }
37
+ end
38
+
39
+ private
40
+
41
+ attr_reader :page
42
+
43
+ def posting_title
44
+ page.at('span.postingtitletext')
45
+ end
46
+
47
+ def image
48
+ image = page.at('img')
49
+ image ? image['src'] : ''
50
+ end
51
+
52
+ def title
53
+ page.at("#titletextonly").text
54
+ end
55
+
56
+ def price
57
+ price = posting_title.at('span.price')
58
+ price ? price.text.gsub(/\$/, '').to_i : 0
59
+ end
60
+
61
+ def location
62
+ location = posting_title.at('small')
63
+ location = location ? location.text.gsub(/ ?[\(\)]/, '') : ''
64
+ end
65
+
66
+ def description
67
+ page.at('section#postingbody').children[2].text.strip
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,41 @@
1
+ class Craiggy
2
+ class PostUrl
3
+ def initialize(item, category, *options)
4
+ @item = item
5
+ @category = category
6
+ @options = options.first || {}
7
+ end
8
+
9
+ def url
10
+ "#{base_url}/search/#{category}?sort=rel&"\
11
+ "#{price_range}query="\
12
+ "#{item.downcase.split(' ') * '+'}"
13
+ end
14
+
15
+ private
16
+ attr_reader :item, :category, :options
17
+
18
+ def base_url
19
+ "https://#{area}.craigslist.org"
20
+ end
21
+
22
+ def price_range
23
+ query = ""
24
+ query += "min_price=#{min_price}&" unless min_price.nil?
25
+ query += "max_price=#{max_price}&" unless max_price.nil?
26
+ query
27
+ end
28
+
29
+ def area
30
+ options[:area] || "sfbay"
31
+ end
32
+
33
+ def min_price
34
+ options[:min_price]
35
+ end
36
+
37
+ def max_price
38
+ options[:max_price]
39
+ end
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: craiggy
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Scott
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-01-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.4'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: webmock
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.22'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.22'
55
+ - !ruby/object:Gem::Dependency
56
+ name: vcr
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ description: All you need is an item title and Craiggy will scrape Craigslist and
70
+ return an array of Posts pertaining to the item.
71
+ email: christo247@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - lib/craiggy.rb
77
+ - lib/craiggy/categories.rb
78
+ - lib/craiggy/post.rb
79
+ - lib/craiggy/post_url.rb
80
+ homepage: https://github.com/steezyduzit/craiggy
81
+ licenses:
82
+ - MIT
83
+ metadata: {}
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project:
100
+ rubygems_version: 2.5.1
101
+ signing_key:
102
+ specification_version: 4
103
+ summary: Scrape posts from Craigslist with an easy API
104
+ test_files: []