crags 1.6.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,2 +1,4 @@
1
1
  pkg/
2
- .DS_Store
2
+ .DS_Store
3
+ .bundle
4
+ coverage
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "curb"
4
+ gem "hpricot"
5
+
6
+ group :test do
7
+ gem "rspec"
8
+ end
data/README.rdoc CHANGED
@@ -4,18 +4,55 @@ A library to help search across multiple craigslist locations.
4
4
 
5
5
  == Usage
6
6
 
7
- searches all available craigslist sites for a keyword in "for sale".
8
- search(keyword)
7
+ There are two ways to search craigslist using crags. You can search by keyword
8
+ in a single location scoped to a category. You can also search by keyword
9
+ across a country scoped by category. Each search type also has a set of
10
+ configurable options.
9
11
 
10
- search also takes a category
11
- search(keyword, 'sss')
12
+ === Single Location Search
12
13
 
13
- There are a bunch of helpful api methods, check the tests for more info.
14
+ Options:
15
+ :keyword
16
+ default: bicycle (because bicycles are awesome)
17
+ :location
18
+ default: sfbay.craigslist.com (because that is where it all began)
19
+ :category
20
+ default: for sale (because buying another bike is always a good idea)
14
21
 
15
- == Requirements
22
+ Locations and Categories are full-on Ruby objects. You can get all of the
23
+ Categories using:
24
+ Category.all
16
25
 
17
- * hpricot
18
- * curb
26
+ Search a single craigslist location (e.g. sfbay.craigslist.com)
27
+ s = Search::Location.new(:keyword => 'shoes')
28
+ s.keyword # this will shoes instead of the default
29
+ s.location # this will the sfbay Location
30
+ s.category # the for sale Category
31
+ s.items # will return a set of Items
32
+
33
+ Each Item has a title, url (which is the link to the item on craigslist), and
34
+ a date, which is the date the item was posted on craigslist.
35
+
36
+ === Country Search
37
+
38
+ Options:
39
+ :keyword
40
+ default: bicycle (because bicycles are awesome)
41
+ :category
42
+ default: for sale (because buying another bike is always a good idea)
43
+ :country
44
+ default: us (because that is where I live)
45
+ :interval
46
+ default: 1 (because hammering the cl servers will make em' angry)
47
+
48
+ Country searches are a combined set of individual location searches. Crags
49
+ imposes a 1 second delay between each location search in order not to overload
50
+ the Craigslist servers. You can change this at your own risk, but doing a lot
51
+ of country wide searches with a 0s interval will probably get your IP banned.
52
+
53
+ == Installation
54
+
55
+ gem install crags
19
56
 
20
57
  == Note on Patches/Pull Requests
21
58
 
data/Rakefile CHANGED
@@ -1,4 +1,3 @@
1
- require 'rubygems'
2
1
  require 'rake'
3
2
 
4
3
  begin
@@ -10,43 +9,34 @@ begin
10
9
  gem.email = "gotascii@gmail.com"
11
10
  gem.homepage = "http://github.com/gotascii/crags"
12
11
  gem.authors = ["Justin Marney"]
12
+ gem.add_dependency('curb')
13
+ gem.add_dependency('hpricot')
13
14
  end
14
15
  Jeweler::GemcutterTasks.new
15
16
  rescue LoadError
16
17
  puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
18
  end
18
19
 
19
- require 'rake/testtask'
20
- Rake::TestTask.new(:test) do |test|
21
- test.libs << 'lib' << 'test'
22
- test.pattern = 'test/**/*_test.rb'
23
- test.verbose = true
20
+ require 'spec/rake/spectask'
21
+ Spec::Rake::SpecTask.new(:spec) do |spec|
22
+ spec.libs << 'lib' << 'spec'
23
+ spec.spec_files = FileList['spec/**/*_spec.rb']
24
24
  end
25
25
 
26
- begin
27
- require 'rcov/rcovtask'
28
- Rcov::RcovTask.new do |test|
29
- test.libs << 'test'
30
- test.pattern = 'test/**/*_test.rb'
31
- test.verbose = true
32
- end
33
- rescue LoadError
34
- task :rcov do
35
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
- end
26
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
27
+ spec.libs << 'lib' << 'spec'
28
+ spec.pattern = 'spec/**/*_spec.rb'
29
+ spec.rcov = true
30
+ spec.rcov_opts = ['--exclude', 'spec,gems,crags.rb']
37
31
  end
38
32
 
39
- task :test => :check_dependencies
33
+ task :spec => :check_dependencies
40
34
 
41
- task :default => :test
35
+ task :default => :spec
42
36
 
43
37
  require 'rake/rdoctask'
44
38
  Rake::RDocTask.new do |rdoc|
45
- if File.exist?('VERSION')
46
- version = File.read('VERSION')
47
- else
48
- version = ""
49
- end
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
50
40
 
51
41
  rdoc.rdoc_dir = 'rdoc'
52
42
  rdoc.title = "crags #{version}"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.6.0
1
+ 2.0.0
data/crags.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{crags}
8
- s.version = "1.6.0"
8
+ s.version = "2.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Justin Marney"]
12
- s.date = %q{2010-04-14}
12
+ s.date = %q{2010-07-26}
13
13
  s.description = %q{A library to help search across multiple craigslist locations.}
14
14
  s.email = %q{gotascii@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -18,21 +18,34 @@ Gem::Specification.new do |s|
18
18
  ]
19
19
  s.files = [
20
20
  ".gitignore",
21
+ "Gemfile",
21
22
  "LICENSE",
22
23
  "README.rdoc",
23
24
  "Rakefile",
24
25
  "VERSION",
25
26
  "crags.gemspec",
26
27
  "lib/crags.rb",
27
- "lib/crags/fetch.rb",
28
- "lib/crags/proxy.rb",
29
- "lib/crags/runner.rb",
30
- "lib/crags/searcher.rb",
31
- "test/crags/fetch_test.rb",
32
- "test/crags/proxy_test.rb",
33
- "test/crags/runner_test.rb",
34
- "test/crags/searcher_test.rb",
35
- "test/test_helper.rb"
28
+ "lib/crags/category.rb",
29
+ "lib/crags/country.rb",
30
+ "lib/crags/fetcher.rb",
31
+ "lib/crags/item.rb",
32
+ "lib/crags/location.rb",
33
+ "lib/crags/search/country.rb",
34
+ "lib/crags/search/location.rb",
35
+ "lib/crags/search/search.rb",
36
+ "lib/ext/hpricot/elem.rb",
37
+ "lib/ext/string.rb",
38
+ "spec/crags/category_spec.rb",
39
+ "spec/crags/country_spec.rb",
40
+ "spec/crags/fetcher_spec.rb",
41
+ "spec/crags/item_spec.rb",
42
+ "spec/crags/location_spec.rb",
43
+ "spec/crags/search/country_spec.rb",
44
+ "spec/crags/search/location_spec.rb",
45
+ "spec/crags/search/search_spec.rb",
46
+ "spec/ext/hpricot/elem_spec.rb",
47
+ "spec/ext/string_spec.rb",
48
+ "spec/spec_helper.rb"
36
49
  ]
37
50
  s.homepage = %q{http://github.com/gotascii/crags}
38
51
  s.rdoc_options = ["--charset=UTF-8"]
@@ -40,11 +53,17 @@ Gem::Specification.new do |s|
40
53
  s.rubygems_version = %q{1.3.6}
41
54
  s.summary = %q{A library to help search across multiple craigslist locations.}
42
55
  s.test_files = [
43
- "test/crags/fetch_test.rb",
44
- "test/crags/proxy_test.rb",
45
- "test/crags/runner_test.rb",
46
- "test/crags/searcher_test.rb",
47
- "test/test_helper.rb"
56
+ "spec/crags/category_spec.rb",
57
+ "spec/crags/country_spec.rb",
58
+ "spec/crags/fetcher_spec.rb",
59
+ "spec/crags/item_spec.rb",
60
+ "spec/crags/location_spec.rb",
61
+ "spec/crags/search/country_spec.rb",
62
+ "spec/crags/search/location_spec.rb",
63
+ "spec/crags/search/search_spec.rb",
64
+ "spec/ext/hpricot/elem_spec.rb",
65
+ "spec/ext/string_spec.rb",
66
+ "spec/spec_helper.rb"
48
67
  ]
49
68
 
50
69
  if s.respond_to? :specification_version then
@@ -52,9 +71,15 @@ Gem::Specification.new do |s|
52
71
  s.specification_version = 3
53
72
 
54
73
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
74
+ s.add_runtime_dependency(%q<curb>, [">= 0"])
75
+ s.add_runtime_dependency(%q<hpricot>, [">= 0"])
55
76
  else
77
+ s.add_dependency(%q<curb>, [">= 0"])
78
+ s.add_dependency(%q<hpricot>, [">= 0"])
56
79
  end
57
80
  else
81
+ s.add_dependency(%q<curb>, [">= 0"])
82
+ s.add_dependency(%q<hpricot>, [">= 0"])
58
83
  end
59
84
  end
60
85
 
@@ -0,0 +1,29 @@
1
+ module Crags
2
+ class Category
3
+ extend Fetcher
4
+ attr_reader :name, :url
5
+
6
+ def initialize(name, abbr)
7
+ @name = name
8
+ @url = "/#{abbr}"
9
+ end
10
+
11
+ def self.doc
12
+ fetch_doc(Config.category_url)
13
+ end
14
+
15
+ def self.links
16
+ doc.search("div.col a").select do |link|
17
+ (link["href"] =~ /forum/).nil?
18
+ end
19
+ end
20
+
21
+ def self.all
22
+ links.collect do |link|
23
+ url = link["href"]
24
+ name = link.inner_html
25
+ Category.new(name, url)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,37 @@
1
+ module Crags
2
+ class Country
3
+ include Fetcher
4
+ attr_reader :code
5
+
6
+ def initialize(code)
7
+ @code = code
8
+ end
9
+
10
+ def url
11
+ "#{Config.country_url}/#{code}"
12
+ end
13
+
14
+ def doc
15
+ fetch_doc(url)
16
+ end
17
+
18
+ def request
19
+ fetch_request(url)
20
+ end
21
+
22
+ def links
23
+ doc.search("#list a")
24
+ end
25
+
26
+ def locations
27
+ if links.empty?
28
+ str = request.last_effective_url
29
+ [Location.new(str.strip_http)]
30
+ else
31
+ links.collect do |link|
32
+ Location.new(link["href"].strip_http)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -1,5 +1,5 @@
1
1
  module Crags
2
- module Fetch
2
+ module Fetcher
3
3
  def fetch_doc(url)
4
4
  Hpricot.parse(fetch_html(url))
5
5
  end
data/lib/crags/item.rb ADDED
@@ -0,0 +1,11 @@
1
+ module Crags
2
+ class Item
3
+ attr_reader :title, :url, :date
4
+
5
+ def initialize(elem)
6
+ @title = elem.title
7
+ @url = elem.url
8
+ @date = elem.date
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ module Crags
2
+ class Location
3
+ attr_reader :domain
4
+
5
+ def initialize(domain)
6
+ @domain = domain
7
+ end
8
+
9
+ def url
10
+ "http://#{domain}"
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,26 @@
1
+ module Crags
2
+ module Search
3
+ class Country < Search
4
+ attr_reader :country, :interval
5
+
6
+ def initialize(opts = {})
7
+ super
8
+ @country = @opts[:country]
9
+ @interval = @opts[:interval]
10
+ end
11
+
12
+ def locations
13
+ country.locations
14
+ end
15
+
16
+ def items
17
+ combined_items = locations.collect do |loc|
18
+ sleep(interval)
19
+ search = Location.new(opts.merge(:location => loc))
20
+ search.items
21
+ end
22
+ combined_items.flatten
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,28 @@
1
+ module Crags
2
+ module Search
3
+ class Location < Search
4
+ include ERB::Util
5
+ include Fetcher
6
+ attr_reader :location
7
+
8
+ def initialize(opts = {})
9
+ super(opts)
10
+ @location = @opts[:location]
11
+ end
12
+
13
+ def url
14
+ "#{location.url}/search#{category.url}?query=#{url_encode(keyword)}"
15
+ end
16
+
17
+ def doc
18
+ fetch_doc("#{url}&format=rss")
19
+ end
20
+
21
+ def items
22
+ doc.search("item").collect do |elem|
23
+ Item.new(elem)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,13 @@
1
+ module Crags
2
+ module Search
3
+ class Search
4
+ attr_reader :keyword, :category, :opts
5
+
6
+ def initialize(opts = {})
7
+ @opts = Config.defaults.merge(opts)
8
+ @keyword = @opts[:keyword]
9
+ @category = @opts[:category]
10
+ end
11
+ end
12
+ end
13
+ end
data/lib/crags.rb CHANGED
@@ -1,42 +1,29 @@
1
- require 'curb'
2
- require 'hpricot'
1
+ require 'bundler'
2
+ Bundler.require
3
3
  require 'erb'
4
+ require 'ostruct'
5
+ require 'ext/string'
6
+ require 'ext/hpricot/elem'
7
+ require 'crags/fetcher'
8
+ require 'crags/country'
9
+ require 'crags/location'
10
+ require 'crags/category'
11
+ require 'crags/item'
12
+ require 'crags/search/search'
13
+ require 'crags/search/location'
14
+ require 'crags/search/country'
4
15
 
5
16
  module Crags
6
- COUNTRIES = [
7
- 'jp',
8
- 'ar',
9
- 'bd',
10
- 'br',
11
- 'ca',
12
- 'cl',
13
- 'co',
14
- 'cr',
15
- 'cz',
16
- 'de',
17
- 'eg',
18
- 'gb',
19
- 'hu',
20
- 'id',
21
- 'ie',
22
- 'il',
23
- 'lb',
24
- 'my',
25
- 'nl',
26
- 'nz',
27
- 'no',
28
- 'pk',
29
- 'pa',
30
- 'ru',
31
- 'th',
32
- 'ae',
33
- 'us',
34
- 've',
35
- 'vn'
36
- ]
37
- end
38
-
39
- require 'crags/fetch'
40
- require 'crags/proxy'
41
- require 'crags/searcher'
42
- require 'crags/runner'
17
+ Config = OpenStruct.new({
18
+ :defaults => {
19
+ :keyword => 'bicycle',
20
+ :category => Category.new('for sale', 'sss'),
21
+ :country => Country.new('us'),
22
+ :location => Location.new('sfbay.craigslist.org'),
23
+ :interval => 1
24
+ },
25
+ :category_url => "http://sfbay.craigslist.org/",
26
+ :country_url => "http://geo.craigslist.org/iso",
27
+ :country_codes => ['jp', 'ar', 'bd', 'br', 'ca', 'cl', 'co', 'cr', 'cz', 'de', 'eg', 'gb', 'hu', 'id', 'ie', 'il', 'lb', 'my', 'nl', 'nz', 'no', 'pk', 'pa', 'ru', 'th', 'ae', 'us', 've', 'vn']
28
+ })
29
+ end
@@ -0,0 +1,19 @@
1
+ module Hpricot
2
+ class Elem
3
+ def title
4
+ at("title").inner_text
5
+ end
6
+
7
+ def url
8
+ self["rdf:about"].strip_http
9
+ end
10
+
11
+ def date_str
12
+ at("dc:date").inner_text
13
+ end
14
+
15
+ def date
16
+ DateTime.parse(date_str)
17
+ end
18
+ end
19
+ end
data/lib/ext/string.rb ADDED
@@ -0,0 +1,5 @@
1
+ class String
2
+ def strip_http
3
+ self.gsub(/^http\:\/\//,'').gsub(/\/$/,'')
4
+ end
5
+ end
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ describe Category, "the class" do
4
+ it "fetches the doc at the configured category url" do
5
+ Crags::Config.stub!(:category_url).and_return("category_url")
6
+ Category.should_receive(:fetch_doc).with("category_url").and_return("doc")
7
+ Category.doc.should == 'doc'
8
+ end
9
+
10
+ describe "with a doc" do
11
+ before do
12
+ @links = [{'href' => 'whoa'}]
13
+ @doc = stub
14
+ Category.stub!(:doc).and_return(@doc)
15
+ end
16
+
17
+ it "gets all of the category links from the doc" do
18
+ @doc.should_receive(:search).with("div.col a").and_return(@links)
19
+ Category.links.should == @links
20
+ end
21
+
22
+ it "doesn't return links to forums" do
23
+ @doc.stub!(:search).and_return(@links)
24
+ @links.first['href'] = 'omg/forum/dude'
25
+ Category.links.should == []
26
+ end
27
+ end
28
+
29
+ it "creates an array of new Categories based on the links" do
30
+ links = []
31
+ 2.times do
32
+ link = {'href' => 'link_href'}
33
+ link.stub!(:inner_html).and_return('link_inner_html')
34
+ links << link
35
+ end
36
+ Category.stub!(:links).and_return(links)
37
+ Category.should_receive(:new).with('link_inner_html', 'link_href').twice.and_return('category!')
38
+ Category.all.should == ['category!', 'category!']
39
+ end
40
+ end
41
+
42
+ describe Category do
43
+ before do
44
+ @cat = Category.new('name', 'abbr')
45
+ end
46
+
47
+ it "has a name reader" do
48
+ @cat.name.should == 'name'
49
+ end
50
+
51
+ it "has a url reader" do
52
+ @cat.url.should == '/abbr'
53
+ end
54
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ describe Country do
4
+ before do
5
+ @country = Country.new("us")
6
+ end
7
+
8
+ it "generates the url for a country" do
9
+ Crags::Config.stub!(:country_url).and_return("country_url")
10
+ @country.url.should == "country_url/us"
11
+ end
12
+
13
+ it "is initialized with a country code" do
14
+ @country.code.should == "us"
15
+ end
16
+
17
+ it "fetches the doc for the country" do
18
+ @country.stub!(:url).and_return("url")
19
+ @country.should_receive(:fetch_doc).with("url").and_return("doc")
20
+ @country.doc.should == "doc"
21
+ end
22
+
23
+ it "fetches the request for the country" do
24
+ @country.stub!(:url).and_return("url")
25
+ @country.should_receive(:fetch_request).with("url").and_return("request")
26
+ @country.request.should == "request"
27
+ end
28
+
29
+ it "gets all a tags from div with id list" do
30
+ doc = mock
31
+ doc.should_receive(:search).with("#list a").and_return("links")
32
+ @country.stub!(:doc).and_return(doc)
33
+ @country.links.should == "links"
34
+ end
35
+
36
+ it "generates an array of urls using link hrefs" do
37
+ links = []
38
+ 2.times do |i|
39
+ link = mock
40
+ link.should_receive(:[]).with("href").and_return("http://url#{i}/")
41
+ links << link
42
+ end
43
+ @country.stub!(:links).and_return(links)
44
+ Location.should_receive(:new).with("url0").and_return("url0")
45
+ Location.should_receive(:new).with("url1").and_return("url1")
46
+ @country.locations.should == ["url0", "url1"]
47
+ end
48
+
49
+ it "generates an array containing one url using location_urls last_effective_url when no links are present" do
50
+ @country.stub!(:links).and_return([])
51
+ req = mock
52
+ req.should_receive(:last_effective_url).and_return('http://url.org/')
53
+ @country.stub!(:request).and_return(req)
54
+ Location.should_receive(:new).with("url.org").and_return("url.org")
55
+ @country.locations.should == ["url.org"]
56
+ end
57
+ end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fetcher do
4
+ before do
5
+ extend Fetcher
6
+ end
7
+
8
+ it "hpricots the fetched html" do
9
+ stub!(:fetch_html).with("url").and_return("html")
10
+ Hpricot.should_receive(:parse).with("html").and_return("doc")
11
+ fetch_doc("url").should == "doc"
12
+ end
13
+
14
+ it "fetch_requests a url" do
15
+ curb = mock("curb")
16
+ curb.stub!(:body_str).and_return("uhh")
17
+ should_receive(:fetch_request).with("url").and_return(curb)
18
+ fetch_html("url").should == "uhh"
19
+ end
20
+
21
+ it "creates a new request" do
22
+ req = mock("req")
23
+ req.stub!(:follow_location=)
24
+ req.stub!(:perform)
25
+ Curl::Easy.should_receive(:new).with("url").and_return(req)
26
+ fetch_request("url").should == req
27
+ end
28
+
29
+ it "follows redirects for fetched requests" do
30
+ req = mock("req")
31
+ req.should_receive(:follow_location=)
32
+ req.should_receive(:perform)
33
+ Curl::Easy.stub!(:new).and_return(req)
34
+ fetch_request("url")
35
+ end
36
+ end