crags 1.6.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,2 +1,4 @@
1
1
  pkg/
2
- .DS_Store
2
+ .DS_Store
3
+ .bundle
4
+ coverage
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "curb"
4
+ gem "hpricot"
5
+
6
+ group :test do
7
+ gem "rspec"
8
+ end
data/README.rdoc CHANGED
@@ -4,18 +4,55 @@ A library to help search across multiple craigslist locations.
4
4
 
5
5
  == Usage
6
6
 
7
- searches all available craigslist sites for a keyword in "for sale".
8
- search(keyword)
7
+ There are two ways to search craigslist using crags. You can search by keyword
8
+ in a single location scoped to a category. You can also search by keyword
9
+ across a country scoped by category. Each search type also has a set of
10
+ configurable options.
9
11
 
10
- search also takes a category
11
- search(keyword, 'sss')
12
+ === Single Location Search
12
13
 
13
- There are a bunch of helpful api methods, check the tests for more info.
14
+ Options:
15
+ :keyword
16
+ default: bicycle (because bicycles are awesome)
17
+ :location
18
+ default: sfbay.craigslist.com (because that is where it all began)
19
+ :category
20
+ default: for sale (because buying another bike is always a good idea)
14
21
 
15
- == Requirements
22
+ Locations and Categories are full-on Ruby objects. You can get all of the
23
+ Categories using:
24
+ Category.all
16
25
 
17
- * hpricot
18
- * curb
26
+ Search a single craigslist location (e.g. sfbay.craigslist.com)
27
+ s = Search::Location.new(:keyword => 'shoes')
28
+ s.keyword # this will shoes instead of the default
29
+ s.location # this will the sfbay Location
30
+ s.category # the for sale Category
31
+ s.items # will return a set of Items
32
+
33
+ Each Item has a title, url (which is the link to the item on craigslist), and
34
+ a date, which is the date the item was posted on craigslist.
35
+
36
+ === Country Search
37
+
38
+ Options:
39
+ :keyword
40
+ default: bicycle (because bicycles are awesome)
41
+ :category
42
+ default: for sale (because buying another bike is always a good idea)
43
+ :country
44
+ default: us (because that is where I live)
45
+ :interval
46
+ default: 1 (because hammering the cl servers will make em' angry)
47
+
48
+ Country searches are a combined set of individual location searches. Crags
49
+ imposes a 1 second delay between each location search in order not to overload
50
+ the Craigslist servers. You can change this at your own risk, but doing a lot
51
+ of country wide searches with a 0s interval will probably get your IP banned.
52
+
53
+ == Installation
54
+
55
+ gem install crags
19
56
 
20
57
  == Note on Patches/Pull Requests
21
58
 
data/Rakefile CHANGED
@@ -1,4 +1,3 @@
1
- require 'rubygems'
2
1
  require 'rake'
3
2
 
4
3
  begin
@@ -10,43 +9,34 @@ begin
10
9
  gem.email = "gotascii@gmail.com"
11
10
  gem.homepage = "http://github.com/gotascii/crags"
12
11
  gem.authors = ["Justin Marney"]
12
+ gem.add_dependency('curb')
13
+ gem.add_dependency('hpricot')
13
14
  end
14
15
  Jeweler::GemcutterTasks.new
15
16
  rescue LoadError
16
17
  puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
18
  end
18
19
 
19
- require 'rake/testtask'
20
- Rake::TestTask.new(:test) do |test|
21
- test.libs << 'lib' << 'test'
22
- test.pattern = 'test/**/*_test.rb'
23
- test.verbose = true
20
+ require 'spec/rake/spectask'
21
+ Spec::Rake::SpecTask.new(:spec) do |spec|
22
+ spec.libs << 'lib' << 'spec'
23
+ spec.spec_files = FileList['spec/**/*_spec.rb']
24
24
  end
25
25
 
26
- begin
27
- require 'rcov/rcovtask'
28
- Rcov::RcovTask.new do |test|
29
- test.libs << 'test'
30
- test.pattern = 'test/**/*_test.rb'
31
- test.verbose = true
32
- end
33
- rescue LoadError
34
- task :rcov do
35
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
- end
26
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
27
+ spec.libs << 'lib' << 'spec'
28
+ spec.pattern = 'spec/**/*_spec.rb'
29
+ spec.rcov = true
30
+ spec.rcov_opts = ['--exclude', 'spec,gems,crags.rb']
37
31
  end
38
32
 
39
- task :test => :check_dependencies
33
+ task :spec => :check_dependencies
40
34
 
41
- task :default => :test
35
+ task :default => :spec
42
36
 
43
37
  require 'rake/rdoctask'
44
38
  Rake::RDocTask.new do |rdoc|
45
- if File.exist?('VERSION')
46
- version = File.read('VERSION')
47
- else
48
- version = ""
49
- end
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
50
40
 
51
41
  rdoc.rdoc_dir = 'rdoc'
52
42
  rdoc.title = "crags #{version}"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.6.0
1
+ 2.0.0
data/crags.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{crags}
8
- s.version = "1.6.0"
8
+ s.version = "2.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Justin Marney"]
12
- s.date = %q{2010-04-14}
12
+ s.date = %q{2010-07-26}
13
13
  s.description = %q{A library to help search across multiple craigslist locations.}
14
14
  s.email = %q{gotascii@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -18,21 +18,34 @@ Gem::Specification.new do |s|
18
18
  ]
19
19
  s.files = [
20
20
  ".gitignore",
21
+ "Gemfile",
21
22
  "LICENSE",
22
23
  "README.rdoc",
23
24
  "Rakefile",
24
25
  "VERSION",
25
26
  "crags.gemspec",
26
27
  "lib/crags.rb",
27
- "lib/crags/fetch.rb",
28
- "lib/crags/proxy.rb",
29
- "lib/crags/runner.rb",
30
- "lib/crags/searcher.rb",
31
- "test/crags/fetch_test.rb",
32
- "test/crags/proxy_test.rb",
33
- "test/crags/runner_test.rb",
34
- "test/crags/searcher_test.rb",
35
- "test/test_helper.rb"
28
+ "lib/crags/category.rb",
29
+ "lib/crags/country.rb",
30
+ "lib/crags/fetcher.rb",
31
+ "lib/crags/item.rb",
32
+ "lib/crags/location.rb",
33
+ "lib/crags/search/country.rb",
34
+ "lib/crags/search/location.rb",
35
+ "lib/crags/search/search.rb",
36
+ "lib/ext/hpricot/elem.rb",
37
+ "lib/ext/string.rb",
38
+ "spec/crags/category_spec.rb",
39
+ "spec/crags/country_spec.rb",
40
+ "spec/crags/fetcher_spec.rb",
41
+ "spec/crags/item_spec.rb",
42
+ "spec/crags/location_spec.rb",
43
+ "spec/crags/search/country_spec.rb",
44
+ "spec/crags/search/location_spec.rb",
45
+ "spec/crags/search/search_spec.rb",
46
+ "spec/ext/hpricot/elem_spec.rb",
47
+ "spec/ext/string_spec.rb",
48
+ "spec/spec_helper.rb"
36
49
  ]
37
50
  s.homepage = %q{http://github.com/gotascii/crags}
38
51
  s.rdoc_options = ["--charset=UTF-8"]
@@ -40,11 +53,17 @@ Gem::Specification.new do |s|
40
53
  s.rubygems_version = %q{1.3.6}
41
54
  s.summary = %q{A library to help search across multiple craigslist locations.}
42
55
  s.test_files = [
43
- "test/crags/fetch_test.rb",
44
- "test/crags/proxy_test.rb",
45
- "test/crags/runner_test.rb",
46
- "test/crags/searcher_test.rb",
47
- "test/test_helper.rb"
56
+ "spec/crags/category_spec.rb",
57
+ "spec/crags/country_spec.rb",
58
+ "spec/crags/fetcher_spec.rb",
59
+ "spec/crags/item_spec.rb",
60
+ "spec/crags/location_spec.rb",
61
+ "spec/crags/search/country_spec.rb",
62
+ "spec/crags/search/location_spec.rb",
63
+ "spec/crags/search/search_spec.rb",
64
+ "spec/ext/hpricot/elem_spec.rb",
65
+ "spec/ext/string_spec.rb",
66
+ "spec/spec_helper.rb"
48
67
  ]
49
68
 
50
69
  if s.respond_to? :specification_version then
@@ -52,9 +71,15 @@ Gem::Specification.new do |s|
52
71
  s.specification_version = 3
53
72
 
54
73
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
74
+ s.add_runtime_dependency(%q<curb>, [">= 0"])
75
+ s.add_runtime_dependency(%q<hpricot>, [">= 0"])
55
76
  else
77
+ s.add_dependency(%q<curb>, [">= 0"])
78
+ s.add_dependency(%q<hpricot>, [">= 0"])
56
79
  end
57
80
  else
81
+ s.add_dependency(%q<curb>, [">= 0"])
82
+ s.add_dependency(%q<hpricot>, [">= 0"])
58
83
  end
59
84
  end
60
85
 
@@ -0,0 +1,29 @@
1
+ module Crags
2
+ class Category
3
+ extend Fetcher
4
+ attr_reader :name, :url
5
+
6
+ def initialize(name, abbr)
7
+ @name = name
8
+ @url = "/#{abbr}"
9
+ end
10
+
11
+ def self.doc
12
+ fetch_doc(Config.category_url)
13
+ end
14
+
15
+ def self.links
16
+ doc.search("div.col a").select do |link|
17
+ (link["href"] =~ /forum/).nil?
18
+ end
19
+ end
20
+
21
+ def self.all
22
+ links.collect do |link|
23
+ url = link["href"]
24
+ name = link.inner_html
25
+ Category.new(name, url)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,37 @@
1
+ module Crags
2
+ class Country
3
+ include Fetcher
4
+ attr_reader :code
5
+
6
+ def initialize(code)
7
+ @code = code
8
+ end
9
+
10
+ def url
11
+ "#{Config.country_url}/#{code}"
12
+ end
13
+
14
+ def doc
15
+ fetch_doc(url)
16
+ end
17
+
18
+ def request
19
+ fetch_request(url)
20
+ end
21
+
22
+ def links
23
+ doc.search("#list a")
24
+ end
25
+
26
+ def locations
27
+ if links.empty?
28
+ str = request.last_effective_url
29
+ [Location.new(str.strip_http)]
30
+ else
31
+ links.collect do |link|
32
+ Location.new(link["href"].strip_http)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -1,5 +1,5 @@
1
1
  module Crags
2
- module Fetch
2
+ module Fetcher
3
3
  def fetch_doc(url)
4
4
  Hpricot.parse(fetch_html(url))
5
5
  end
data/lib/crags/item.rb ADDED
@@ -0,0 +1,11 @@
1
+ module Crags
2
+ class Item
3
+ attr_reader :title, :url, :date
4
+
5
+ def initialize(elem)
6
+ @title = elem.title
7
+ @url = elem.url
8
+ @date = elem.date
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ module Crags
2
+ class Location
3
+ attr_reader :domain
4
+
5
+ def initialize(domain)
6
+ @domain = domain
7
+ end
8
+
9
+ def url
10
+ "http://#{domain}"
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,26 @@
1
+ module Crags
2
+ module Search
3
+ class Country < Search
4
+ attr_reader :country, :interval
5
+
6
+ def initialize(opts = {})
7
+ super
8
+ @country = @opts[:country]
9
+ @interval = @opts[:interval]
10
+ end
11
+
12
+ def locations
13
+ country.locations
14
+ end
15
+
16
+ def items
17
+ combined_items = locations.collect do |loc|
18
+ sleep(interval)
19
+ search = Location.new(opts.merge(:location => loc))
20
+ search.items
21
+ end
22
+ combined_items.flatten
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,28 @@
1
+ module Crags
2
+ module Search
3
+ class Location < Search
4
+ include ERB::Util
5
+ include Fetcher
6
+ attr_reader :location
7
+
8
+ def initialize(opts = {})
9
+ super(opts)
10
+ @location = @opts[:location]
11
+ end
12
+
13
+ def url
14
+ "#{location.url}/search#{category.url}?query=#{url_encode(keyword)}"
15
+ end
16
+
17
+ def doc
18
+ fetch_doc("#{url}&format=rss")
19
+ end
20
+
21
+ def items
22
+ doc.search("item").collect do |elem|
23
+ Item.new(elem)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,13 @@
1
+ module Crags
2
+ module Search
3
+ class Search
4
+ attr_reader :keyword, :category, :opts
5
+
6
+ def initialize(opts = {})
7
+ @opts = Config.defaults.merge(opts)
8
+ @keyword = @opts[:keyword]
9
+ @category = @opts[:category]
10
+ end
11
+ end
12
+ end
13
+ end
data/lib/crags.rb CHANGED
@@ -1,42 +1,29 @@
1
- require 'curb'
2
- require 'hpricot'
1
+ require 'bundler'
2
+ Bundler.require
3
3
  require 'erb'
4
+ require 'ostruct'
5
+ require 'ext/string'
6
+ require 'ext/hpricot/elem'
7
+ require 'crags/fetcher'
8
+ require 'crags/country'
9
+ require 'crags/location'
10
+ require 'crags/category'
11
+ require 'crags/item'
12
+ require 'crags/search/search'
13
+ require 'crags/search/location'
14
+ require 'crags/search/country'
4
15
 
5
16
  module Crags
6
- COUNTRIES = [
7
- 'jp',
8
- 'ar',
9
- 'bd',
10
- 'br',
11
- 'ca',
12
- 'cl',
13
- 'co',
14
- 'cr',
15
- 'cz',
16
- 'de',
17
- 'eg',
18
- 'gb',
19
- 'hu',
20
- 'id',
21
- 'ie',
22
- 'il',
23
- 'lb',
24
- 'my',
25
- 'nl',
26
- 'nz',
27
- 'no',
28
- 'pk',
29
- 'pa',
30
- 'ru',
31
- 'th',
32
- 'ae',
33
- 'us',
34
- 've',
35
- 'vn'
36
- ]
37
- end
38
-
39
- require 'crags/fetch'
40
- require 'crags/proxy'
41
- require 'crags/searcher'
42
- require 'crags/runner'
17
+ Config = OpenStruct.new({
18
+ :defaults => {
19
+ :keyword => 'bicycle',
20
+ :category => Category.new('for sale', 'sss'),
21
+ :country => Country.new('us'),
22
+ :location => Location.new('sfbay.craigslist.org'),
23
+ :interval => 1
24
+ },
25
+ :category_url => "http://sfbay.craigslist.org/",
26
+ :country_url => "http://geo.craigslist.org/iso",
27
+ :country_codes => ['jp', 'ar', 'bd', 'br', 'ca', 'cl', 'co', 'cr', 'cz', 'de', 'eg', 'gb', 'hu', 'id', 'ie', 'il', 'lb', 'my', 'nl', 'nz', 'no', 'pk', 'pa', 'ru', 'th', 'ae', 'us', 've', 'vn']
28
+ })
29
+ end
@@ -0,0 +1,19 @@
1
+ module Hpricot
2
+ class Elem
3
+ def title
4
+ at("title").inner_text
5
+ end
6
+
7
+ def url
8
+ self["rdf:about"].strip_http
9
+ end
10
+
11
+ def date_str
12
+ at("dc:date").inner_text
13
+ end
14
+
15
+ def date
16
+ DateTime.parse(date_str)
17
+ end
18
+ end
19
+ end
data/lib/ext/string.rb ADDED
@@ -0,0 +1,5 @@
1
+ class String
2
+ def strip_http
3
+ self.gsub(/^http\:\/\//,'').gsub(/\/$/,'')
4
+ end
5
+ end
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ describe Category, "the class" do
4
+ it "fetches the doc at the configured category url" do
5
+ Crags::Config.stub!(:category_url).and_return("category_url")
6
+ Category.should_receive(:fetch_doc).with("category_url").and_return("doc")
7
+ Category.doc.should == 'doc'
8
+ end
9
+
10
+ describe "with a doc" do
11
+ before do
12
+ @links = [{'href' => 'whoa'}]
13
+ @doc = stub
14
+ Category.stub!(:doc).and_return(@doc)
15
+ end
16
+
17
+ it "gets all of the category links from the doc" do
18
+ @doc.should_receive(:search).with("div.col a").and_return(@links)
19
+ Category.links.should == @links
20
+ end
21
+
22
+ it "doesn't return links to forums" do
23
+ @doc.stub!(:search).and_return(@links)
24
+ @links.first['href'] = 'omg/forum/dude'
25
+ Category.links.should == []
26
+ end
27
+ end
28
+
29
+ it "creates an array of new Categories based on the links" do
30
+ links = []
31
+ 2.times do
32
+ link = {'href' => 'link_href'}
33
+ link.stub!(:inner_html).and_return('link_inner_html')
34
+ links << link
35
+ end
36
+ Category.stub!(:links).and_return(links)
37
+ Category.should_receive(:new).with('link_inner_html', 'link_href').twice.and_return('category!')
38
+ Category.all.should == ['category!', 'category!']
39
+ end
40
+ end
41
+
42
+ describe Category do
43
+ before do
44
+ @cat = Category.new('name', 'abbr')
45
+ end
46
+
47
+ it "has a name reader" do
48
+ @cat.name.should == 'name'
49
+ end
50
+
51
+ it "has a url reader" do
52
+ @cat.url.should == '/abbr'
53
+ end
54
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ describe Country do
4
+ before do
5
+ @country = Country.new("us")
6
+ end
7
+
8
+ it "generates the url for a country" do
9
+ Crags::Config.stub!(:country_url).and_return("country_url")
10
+ @country.url.should == "country_url/us"
11
+ end
12
+
13
+ it "is initialized with a country code" do
14
+ @country.code.should == "us"
15
+ end
16
+
17
+ it "fetches the doc for the country" do
18
+ @country.stub!(:url).and_return("url")
19
+ @country.should_receive(:fetch_doc).with("url").and_return("doc")
20
+ @country.doc.should == "doc"
21
+ end
22
+
23
+ it "fetches the request for the country" do
24
+ @country.stub!(:url).and_return("url")
25
+ @country.should_receive(:fetch_request).with("url").and_return("request")
26
+ @country.request.should == "request"
27
+ end
28
+
29
+ it "gets all a tags from div with id list" do
30
+ doc = mock
31
+ doc.should_receive(:search).with("#list a").and_return("links")
32
+ @country.stub!(:doc).and_return(doc)
33
+ @country.links.should == "links"
34
+ end
35
+
36
+ it "generates an array of urls using link hrefs" do
37
+ links = []
38
+ 2.times do |i|
39
+ link = mock
40
+ link.should_receive(:[]).with("href").and_return("http://url#{i}/")
41
+ links << link
42
+ end
43
+ @country.stub!(:links).and_return(links)
44
+ Location.should_receive(:new).with("url0").and_return("url0")
45
+ Location.should_receive(:new).with("url1").and_return("url1")
46
+ @country.locations.should == ["url0", "url1"]
47
+ end
48
+
49
+ it "generates an array containing one url using location_urls last_effective_url when no links are present" do
50
+ @country.stub!(:links).and_return([])
51
+ req = mock
52
+ req.should_receive(:last_effective_url).and_return('http://url.org/')
53
+ @country.stub!(:request).and_return(req)
54
+ Location.should_receive(:new).with("url.org").and_return("url.org")
55
+ @country.locations.should == ["url.org"]
56
+ end
57
+ end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fetcher do
4
+ before do
5
+ extend Fetcher
6
+ end
7
+
8
+ it "hpricots the fetched html" do
9
+ stub!(:fetch_html).with("url").and_return("html")
10
+ Hpricot.should_receive(:parse).with("html").and_return("doc")
11
+ fetch_doc("url").should == "doc"
12
+ end
13
+
14
+ it "fetch_requests a url" do
15
+ curb = mock("curb")
16
+ curb.stub!(:body_str).and_return("uhh")
17
+ should_receive(:fetch_request).with("url").and_return(curb)
18
+ fetch_html("url").should == "uhh"
19
+ end
20
+
21
+ it "creates a new request" do
22
+ req = mock("req")
23
+ req.stub!(:follow_location=)
24
+ req.stub!(:perform)
25
+ Curl::Easy.should_receive(:new).with("url").and_return(req)
26
+ fetch_request("url").should == req
27
+ end
28
+
29
+ it "follows redirects for fetched requests" do
30
+ req = mock("req")
31
+ req.should_receive(:follow_location=)
32
+ req.should_receive(:perform)
33
+ Curl::Easy.stub!(:new).and_return(req)
34
+ fetch_request("url")
35
+ end
36
+ end