craiggy 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/craiggy.rb +42 -0
- data/lib/craiggy/categories.rb +20 -0
- data/lib/craiggy/post.rb +70 -0
- data/lib/craiggy/post_url.rb +41 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6ddd12ddb0d4c32992b841a92965ee0fca1234fb
|
4
|
+
data.tar.gz: b45d8d31c309b16e26ee0751cb78097b215e09fd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1d9dbbf75ef9b0373214ab58fe3b7c6b02298428ad15bf80b39f3c56b4a30327cf1dfdcdeba91325666298921f1f74fd496c7a05ccbedef5a57d03a51ef17c66
|
7
|
+
data.tar.gz: ffbc5d0af69dec30fb7b1e2ae4d976ead60bdd76794f9e86f326de6c753ed1466e00e07c4d51f445e3c87bc0467531eff0e43451ffc748c051c13d242062056c
|
data/lib/craiggy.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require "mechanize"
|
2
|
+
|
3
|
+
require "craiggy/post"
|
4
|
+
require "craiggy/post_url"
|
5
|
+
require "craiggy/categories"
|
6
|
+
|
7
|
+
class Craiggy
|
8
|
+
CATEGORY = "sss"
|
9
|
+
|
10
|
+
def self.get(item, *arguments)
|
11
|
+
initialize(item, *arguments)
|
12
|
+
scrape_posts
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.categories
|
16
|
+
self::CATEGORIES.keys
|
17
|
+
end
|
18
|
+
|
19
|
+
class << self
|
20
|
+
private
|
21
|
+
attr_reader :url
|
22
|
+
|
23
|
+
def initialize(item, *arguments)
|
24
|
+
@url = PostUrl.new(item, self::CATEGORY, *arguments).url
|
25
|
+
end
|
26
|
+
|
27
|
+
def scrape_posts
|
28
|
+
matching_links.map { |link| Post.new(link.click) }
|
29
|
+
end
|
30
|
+
|
31
|
+
def matching_links
|
32
|
+
mech.get(url).links_with(dom_class: "result-title hdrlnk")
|
33
|
+
end
|
34
|
+
|
35
|
+
def mech
|
36
|
+
Mechanize.new do |agent|
|
37
|
+
agent.robots = false
|
38
|
+
agent.user_agent_alias = "Mac Safari"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Craiggy
|
2
|
+
CATEGORIES = {
|
3
|
+
"Community" => "ccc",
|
4
|
+
"Events" => "eee",
|
5
|
+
"ForSale" => "sss",
|
6
|
+
"Gigs" => "ggg",
|
7
|
+
"Housing" => "hhh",
|
8
|
+
"Jobs" => "jjj",
|
9
|
+
"Personals" => "ppp",
|
10
|
+
"Resumes" => "rrr",
|
11
|
+
"Services" => "bbb",
|
12
|
+
}.freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
Craiggy::CATEGORIES.each do |namespace, category_code|
|
16
|
+
Craiggy.const_set(
|
17
|
+
namespace,
|
18
|
+
Class.new(Craiggy) { self::CATEGORY = category_code },
|
19
|
+
)
|
20
|
+
end
|
data/lib/craiggy/post.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# Used for packaging Craigslist post data
|
2
|
+
class Craiggy
|
3
|
+
class Post
|
4
|
+
attr_reader :title, :image, :price, :location, :url, :description
|
5
|
+
|
6
|
+
def initialize(page)
|
7
|
+
post_params = PostScraper.new(page).post_params
|
8
|
+
@title = post_params[:title]
|
9
|
+
@image = post_params[:image]
|
10
|
+
@price = post_params[:price]
|
11
|
+
@location = post_params[:location]
|
12
|
+
@description = post_params[:description]
|
13
|
+
@url = post_params[:url]
|
14
|
+
end
|
15
|
+
|
16
|
+
def has_image?
|
17
|
+
image != ""
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# Scrapes and formats `post_params` for easy `Post` initialization
|
23
|
+
class PostScraper
|
24
|
+
def initialize(page)
|
25
|
+
@page = page
|
26
|
+
end
|
27
|
+
|
28
|
+
def post_params
|
29
|
+
{
|
30
|
+
image: image,
|
31
|
+
title: title,
|
32
|
+
price: price,
|
33
|
+
location: location,
|
34
|
+
description: description,
|
35
|
+
url: page.uri.to_s,
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
attr_reader :page
|
42
|
+
|
43
|
+
def posting_title
|
44
|
+
page.at('span.postingtitletext')
|
45
|
+
end
|
46
|
+
|
47
|
+
def image
|
48
|
+
image = page.at('img')
|
49
|
+
image ? image['src'] : ''
|
50
|
+
end
|
51
|
+
|
52
|
+
def title
|
53
|
+
page.at("#titletextonly").text
|
54
|
+
end
|
55
|
+
|
56
|
+
def price
|
57
|
+
price = posting_title.at('span.price')
|
58
|
+
price ? price.text.gsub(/\$/, '').to_i : 0
|
59
|
+
end
|
60
|
+
|
61
|
+
def location
|
62
|
+
location = posting_title.at('small')
|
63
|
+
location = location ? location.text.gsub(/ ?[\(\)]/, '') : ''
|
64
|
+
end
|
65
|
+
|
66
|
+
def description
|
67
|
+
page.at('section#postingbody').children[2].text.strip
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class Craiggy
|
2
|
+
class PostUrl
|
3
|
+
def initialize(item, category, *options)
|
4
|
+
@item = item
|
5
|
+
@category = category
|
6
|
+
@options = options.first || {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def url
|
10
|
+
"#{base_url}/search/#{category}?sort=rel&"\
|
11
|
+
"#{price_range}query="\
|
12
|
+
"#{item.downcase.split(' ') * '+'}"
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
attr_reader :item, :category, :options
|
17
|
+
|
18
|
+
def base_url
|
19
|
+
"https://#{area}.craigslist.org"
|
20
|
+
end
|
21
|
+
|
22
|
+
def price_range
|
23
|
+
query = ""
|
24
|
+
query += "min_price=#{min_price}&" unless min_price.nil?
|
25
|
+
query += "max_price=#{max_price}&" unless max_price.nil?
|
26
|
+
query
|
27
|
+
end
|
28
|
+
|
29
|
+
def area
|
30
|
+
options[:area] || "sfbay"
|
31
|
+
end
|
32
|
+
|
33
|
+
def min_price
|
34
|
+
options[:min_price]
|
35
|
+
end
|
36
|
+
|
37
|
+
def max_price
|
38
|
+
options[:max_price]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: craiggy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Scott
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.4'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.4'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: webmock
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.22'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.22'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: vcr
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
description: All you need is an item title and Craiggy will scrape Craigslist and
|
70
|
+
return an array of Posts pertaining to the item.
|
71
|
+
email: christo247@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- lib/craiggy.rb
|
77
|
+
- lib/craiggy/categories.rb
|
78
|
+
- lib/craiggy/post.rb
|
79
|
+
- lib/craiggy/post_url.rb
|
80
|
+
homepage: https://github.com/steezyduzit/craiggy
|
81
|
+
licenses:
|
82
|
+
- MIT
|
83
|
+
metadata: {}
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubyforge_project:
|
100
|
+
rubygems_version: 2.5.1
|
101
|
+
signing_key:
|
102
|
+
specification_version: 4
|
103
|
+
summary: Scrape posts from Craigslist with an easy API
|
104
|
+
test_files: []
|