crags 1.5.9

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ pkg/
2
+ .DS_Store
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Justin Marney
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,33 @@
1
+ = crags
2
+
3
+ A library to help search across multiple craigslist locations.
4
+
5
+ == Usage
6
+
7
+ searches all available craigslist sites for a keyword in "for sale".
8
+ search(keyword)
9
+
10
+ search also takes a category
11
+ search(keyword, 'sss')
12
+
13
+ There are a bunch of helpful api methods, check the tests for more info.
14
+
15
+ == Requirements
16
+
17
+ * hpricot
18
+ * curb
19
+
20
+ == Note on Patches/Pull Requests
21
+
22
+ * Fork the project.
23
+ * Make your feature addition or bug fix.
24
+ * Add tests for it. This is important so I don't break it in a
25
+ future version unintentionally.
26
+ * Commit, do not mess with rakefile, version, or history.
27
+ (if you want to have your own version, that is fine but
28
+ bump version in a commit by itself I can ignore when I pull)
29
+ * Send me a pull request. Bonus points for topic branches.
30
+
31
+ == Copyright
32
+
33
+ Copyright (c) 2009 Justin Marney. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,55 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "crags"
8
+ gem.summary = %Q{A library to help search across multiple craigslist locations.}
9
+ gem.description = %Q{A library to help search across multiple craigslist locations.}
10
+ gem.email = "gotascii@gmail.com"
11
+ gem.homepage = "http://github.com/gotascii/crags"
12
+ gem.authors = ["Justin Marney"]
13
+ end
14
+ Jeweler::GemcutterTasks.new
15
+ rescue LoadError
16
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
+ end
18
+
19
+ require 'rake/testtask'
20
+ Rake::TestTask.new(:test) do |test|
21
+ test.libs << 'lib' << 'test'
22
+ test.pattern = 'test/**/*_test.rb'
23
+ test.verbose = true
24
+ end
25
+
26
+ begin
27
+ require 'rcov/rcovtask'
28
+ Rcov::RcovTask.new do |test|
29
+ test.libs << 'test'
30
+ test.pattern = 'test/**/*_test.rb'
31
+ test.verbose = true
32
+ end
33
+ rescue LoadError
34
+ task :rcov do
35
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
+ end
37
+ end
38
+
39
+ task :test => :check_dependencies
40
+
41
+ task :default => :test
42
+
43
+ require 'rake/rdoctask'
44
+ Rake::RDocTask.new do |rdoc|
45
+ if File.exist?('VERSION')
46
+ version = File.read('VERSION')
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "crags #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.5.9
data/crags.gemspec ADDED
@@ -0,0 +1,59 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{crags}
8
+ s.version = "1.5.9"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Justin Marney"]
12
+ s.date = %q{2009-10-13}
13
+ s.description = %q{A library to help search across multiple craigslist locations.}
14
+ s.email = %q{gotascii@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "crags.gemspec",
26
+ "lib/crags.rb",
27
+ "lib/crags/fetch.rb",
28
+ "lib/crags/proxy.rb",
29
+ "lib/crags/runner.rb",
30
+ "lib/crags/searcher.rb",
31
+ "test/crags/fetch_test.rb",
32
+ "test/crags/proxy_test.rb",
33
+ "test/crags/runner_test.rb",
34
+ "test/crags/searcher_test.rb",
35
+ "test/test_helper.rb"
36
+ ]
37
+ s.homepage = %q{http://github.com/gotascii/crags}
38
+ s.rdoc_options = ["--charset=UTF-8"]
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = %q{1.3.5}
41
+ s.summary = %q{A library to help search across multiple craigslist locations.}
42
+ s.test_files = [
43
+ "test/crags/fetch_test.rb",
44
+ "test/crags/proxy_test.rb",
45
+ "test/crags/runner_test.rb",
46
+ "test/crags/searcher_test.rb",
47
+ "test/test_helper.rb"
48
+ ]
49
+
50
+ if s.respond_to? :specification_version then
51
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
52
+ s.specification_version = 3
53
+
54
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
+ else
56
+ end
57
+ else
58
+ end
59
+ end
data/lib/crags.rb ADDED
@@ -0,0 +1,41 @@
1
+ require 'curb'
2
+ require 'hpricot'
3
+ require 'erb'
4
+
5
+ module Crags
6
+ COUNTRIES = [
7
+ 'jp',
8
+ 'ar',
9
+ 'bd',
10
+ 'br',
11
+ 'ca',
12
+ 'cl',
13
+ 'co',
14
+ 'cr',
15
+ 'cz',
16
+ 'de',
17
+ 'eg',
18
+ 'hu',
19
+ 'id',
20
+ 'ie',
21
+ 'il',
22
+ 'lb',
23
+ 'my',
24
+ 'nl',
25
+ 'nz',
26
+ 'no',
27
+ 'pk',
28
+ 'pa',
29
+ 'ru',
30
+ 'th',
31
+ 'ae',
32
+ 'us',
33
+ 've',
34
+ 'vn'
35
+ ]
36
+ end
37
+
38
+ require 'crags/fetch'
39
+ require 'crags/proxy'
40
+ require 'crags/searcher'
41
+ require 'crags/runner'
@@ -0,0 +1,19 @@
1
+ module Crags
2
+ module Fetch
3
+ def fetch_doc(url)
4
+ Hpricot.parse(fetch_html(url))
5
+ end
6
+
7
+ def fetch_html(url)
8
+ req = fetch_request(url)
9
+ req.body_str
10
+ end
11
+
12
+ def fetch_request(url)
13
+ req = Curl::Easy.new(url)
14
+ req.follow_location = true
15
+ req.perform
16
+ req
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ module Crags
2
+ module Proxy
3
+ include Fetch
4
+
5
+ def lists
6
+ ["http://www.proxy4free.com/page1.html","http://www.proxy4free.com/page3.html"]
7
+ end
8
+
9
+ def fetch_lists
10
+ lists.collect {|url| fetch_html(url)}
11
+ end
12
+
13
+ def scan(text)
14
+ text.scan(/\d+\.\d+\.\d+\.\d+/)
15
+ end
16
+
17
+ def proxies
18
+ fetch_lists.collect{|html|scan(html)}.flatten.uniq
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,10 @@
1
+ module Crags
2
+ class Runner
3
+ include Searcher
4
+
5
+ def search_location(keyword, loc, category = 'sss')
6
+ puts "Searching #{loc}..."
7
+ super
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,77 @@
1
+ module Crags
2
+ module Searcher
3
+ include Fetch
4
+ include ERB::Util
5
+
6
+ def strip_http(url)
7
+ url.gsub(/^http\:\/\//,'').gsub(/\/$/,'')
8
+ end
9
+
10
+ def location_link(country)
11
+ "http://geo.craigslist.org/iso/#{country}"
12
+ end
13
+
14
+ def location_doc(country)
15
+ fetch_doc(location_link(country))
16
+ end
17
+
18
+ def location_request(country)
19
+ fetch_request(location_link(country))
20
+ end
21
+
22
+ def location_links(country)
23
+ location_doc(country).search("#list a")
24
+ end
25
+
26
+ def locations(country)
27
+ linkz = location_links(country)
28
+ if linkz.empty?
29
+ [strip_http(location_request(country).last_effective_url)]
30
+ else
31
+ linkz.collect{|link| strip_http(link["href"]) }
32
+ end
33
+ end
34
+
35
+ def categories
36
+ doc = fetch_doc("http://sfbay.craigslist.org/")
37
+ links = doc.search("table[@summary=\"for sale\"] a")
38
+ categories = {}
39
+ links.each do |link|
40
+ categories[link.inner_html] = link["href"]
41
+ end
42
+ categories
43
+ end
44
+
45
+ def search(keyword, country = 'us', category = 'sss', &block)
46
+ locations(country).collect do |loc|
47
+ sleep(1 + rand(3))
48
+ search_location(keyword, loc, category, &block)
49
+ end.flatten
50
+ end
51
+
52
+ def items(doc)
53
+ doc.search("item").collect do |item|
54
+ hashify(item)
55
+ end
56
+ end
57
+
58
+ def hashify(item)
59
+ title = item.at("title").inner_text
60
+ url = strip_http(item["rdf:about"])
61
+ date = DateTime.parse(item.at("dc:date").inner_text)
62
+ {:title => title, :url => url, :date => date}
63
+ end
64
+
65
+ def search_location_link(keyword, loc, category = 'sss')
66
+ "http://#{loc}/search/#{category}?query=#{url_encode(keyword)}"
67
+ end
68
+
69
+ def search_location(keyword, loc, category = 'sss', &block)
70
+ doc = fetch_doc("#{search_location_link(keyword, loc, category)}&format=rss")
71
+ items(doc).collect do |item|
72
+ yield item if block_given?
73
+ item
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class Crags::FetchTest < Test::Unit::TestCase
4
+ context "Fetch" do
5
+ setup do
6
+ extend Crags::Fetch
7
+ end
8
+
9
+ should "fetch doc should hpricot fetched html" do
10
+ stubs(:fetch_html).with("url").returns("html")
11
+ Hpricot.expects(:parse).with("html").returns("doc")
12
+ fetch_doc("url").should == "doc"
13
+ end
14
+
15
+ should "fetch html should fetch_request a url" do
16
+ curb = stub(:body_str => "uhh")
17
+ expects(:fetch_request).with("url").returns(curb)
18
+ fetch_html("url").should == "uhh"
19
+ end
20
+
21
+ should "create a new request" do
22
+ req = stub(:follow_location= => nil, :perform => nil)
23
+ Curl::Easy.expects(:new).with("url").returns(req)
24
+ fetch_request("url").should == req
25
+ end
26
+
27
+ should "follow redirects for fetched requests" do
28
+ req = mock(:follow_location= => nil, :perform => nil)
29
+ Curl::Easy.stubs(:new).returns(req)
30
+ fetch_request("url")
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,25 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class Crags::ProxyTest < Test::Unit::TestCase
4
+ context "Proxy" do
5
+ setup do
6
+ extend Crags::Proxy
7
+ end
8
+
9
+ should "lists should return a list of proxy list websites" do
10
+ lists.should == ["http://www.proxy4free.com/page1.html", "http://www.proxy4free.com/page3.html"]
11
+ end
12
+
13
+ should "fetch lists should fetch html for each site in lists" do
14
+ stubs(:lists).returns(["1", "2"])
15
+ expects(:fetch_html).with("1").returns("html_1")
16
+ expects(:fetch_html).with("2").returns("html_2")
17
+ fetch_lists.should == ["html_1", "html_2"]
18
+ end
19
+
20
+ should "scan should return all ips in a text blizoc" do
21
+ text = "192.168.1.2 omg dude!! wtf.f.f.asdasd9.8.9 78.900.42.32"
22
+ scan(text).should == ["192.168.1.2", "78.900.42.32"]
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,28 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class Crags::RunnerTest < Test::Unit::TestCase
4
+ context "instance of Runner" do
5
+ setup do
6
+ @runner = Crags::Runner.new
7
+ @runner.stubs(:fetch_doc)
8
+ @runner.stubs(:items).returns([])
9
+ end
10
+
11
+ should "runner should include searcher" do
12
+ Crags::Runner.ancestors.include?(Crags::Searcher).should == true
13
+ end
14
+
15
+ should "search location should puts message with loc" do
16
+ @runner.expects(:puts).with { |val| val =~ /location/ }
17
+ @runner.search_location("", "location", "category")
18
+ end
19
+
20
+ should "search location should take a category" do
21
+ @runner.search_location("", "location", "category")
22
+ end
23
+
24
+ should "search location should have default category sss" do
25
+ @runner.search_location("", "location")
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,139 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class Crags::SearcherTest < Test::Unit::TestCase
4
+
5
+ context "Searcher with stubbed fetch doc" do
6
+ setup do
7
+ extend Crags::Searcher
8
+ stubs(:sleep)
9
+ stubs(:fetch_doc)
10
+ end
11
+
12
+ should "strip_http should remove http:// and trailing /" do
13
+ url = "http://omg/"
14
+ strip_http(url).should == "omg"
15
+ end
16
+
17
+ should "strip_http should remove http:// when there is no trailing slash" do
18
+ url = "http://omg"
19
+ strip_http(url).should == "omg"
20
+ end
21
+
22
+ should "location doc should fetch doc at location url" do
23
+ expects(:fetch_doc).with("http://geo.craigslist.org/iso/us").returns("doc")
24
+ location_doc('us').should == "doc"
25
+ end
26
+
27
+ should "location links should get all a tags from div with id list" do
28
+ doc = mock { expects(:search).with("#list a").returns("links") }
29
+ stubs(:location_doc).returns(doc)
30
+ location_links('us').should == "links"
31
+ end
32
+
33
+ should "locations should return array of urls using a location link's href" do
34
+ links = []
35
+ 2.times do |i|
36
+ links << mock {|m| m.expects(:[]).with("href").returns("http://url#{i}/") }
37
+ end
38
+ stubs(:location_links).returns(links)
39
+ locations('us').should == ["url0", "url1"]
40
+ end
41
+
42
+ should "locations should return array of one url using location_urls last_effective_url when no links are present on location_url page" do
43
+ stubs(:location_links).returns([])
44
+ req = mock(:last_effective_url => 'http://url.org/')
45
+ stubs(:location_request).with('us').returns(req)
46
+ locations('us').should == ["url.org"]
47
+ end
48
+
49
+ should "search should search location for each location with keyword and return list" do
50
+ locations = ["url0", "url1"]
51
+
52
+ locations.each do |loc|
53
+ expects(:search_location).with("omg", loc, 'sss').returns(["1#{loc}", "2#{loc}"])
54
+ end
55
+
56
+ stubs(:locations).returns(locations)
57
+ search("omg").should == ["1url0", "2url0", "1url1", "2url1"]
58
+ end
59
+
60
+ should "search should call sleep for each location" do
61
+ expects(:sleep).times(2)
62
+ stubs(:locations).returns([1,2])
63
+ stubs(:search_location)
64
+ search("")
65
+ end
66
+
67
+ should "search location should fetch doc for search url" do
68
+ expects(:fetch_doc).with("http://url/search/sss?query=keyword&format=rss")
69
+ stubs(:items).returns([])
70
+ search_location("keyword", "url")
71
+ end
72
+
73
+ should "search location should create return items" do
74
+ items = [1,2,3]
75
+ expects(:items).returns(items)
76
+ search_location("keyword", "url").should == items
77
+ end
78
+
79
+ should "items should get all item elements from doc" do
80
+ item = stub
81
+ stubs(:hashify).with(item).returns(1)
82
+ doc = mock { expects(:search).with("item").returns([item]) }
83
+ items(doc).should == [1]
84
+ end
85
+
86
+ should "items should hashify all item elements from doc" do
87
+ item = stub
88
+ expects(:hashify).with(item).returns(1)
89
+ doc = stub { stubs(:search).returns([item]) }
90
+ items(doc).should == [1]
91
+ end
92
+
93
+ should "categories should fetch doc the main sfbay page" do
94
+ doc = stub(:search => [])
95
+ expects(:fetch_doc).with("http://sfbay.craigslist.org/").returns(doc)
96
+ categories
97
+ end
98
+
99
+ should "categories should search for all links in the table with property summary equal to for sale" do
100
+ doc = mock { expects(:search).with("table[@summary=\"for sale\"] a").returns([]) }
101
+ stubs(:fetch_doc).returns(doc)
102
+ categories
103
+ end
104
+
105
+ should "categories should return a hash with link inner html keys and link href values" do
106
+ link = stub(:inner_html => "inner_html") do
107
+ stubs(:[]).with("href").returns("href")
108
+ end
109
+
110
+ doc = stub(:search => [link, link])
111
+ stubs(:fetch_doc).returns(doc)
112
+ categories.should == {'inner_html' => 'href', 'inner_html' => 'href'}
113
+ end
114
+
115
+ should "search location should accept a category parameter" do
116
+ expects(:fetch_doc).with("http://loc/search/scram?query=keyword&format=rss")
117
+ stubs(:items).returns([])
118
+ search_location('keyword', 'loc', 'scram')
119
+ end
120
+
121
+ should "search location default category is sss" do
122
+ expects(:fetch_doc).with("http://loc/search/sss?query=keyword&format=rss")
123
+ stubs(:items).returns([])
124
+ search_location('keyword', 'loc')
125
+ end
126
+
127
+ should "search should pass parameter to search location" do
128
+ stubs(:locations).returns([0])
129
+ expects(:search_location).with('keyword', 0, 'chum')
130
+ search('keyword', 'us', 'chum')
131
+ end
132
+
133
+ should "search should have default category of sss" do
134
+ stubs(:locations).returns([0])
135
+ expects(:search_location).with('keyword', 0, 'sss')
136
+ search('keyword')
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'shoulda'
3
+ require 'matchy'
4
+ require 'mocha'
5
+ require 'crags'
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crags
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.9
5
+ platform: ruby
6
+ authors:
7
+ - Justin Marney
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-13 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A library to help search across multiple craigslist locations.
17
+ email: gotascii@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - .gitignore
27
+ - LICENSE
28
+ - README.rdoc
29
+ - Rakefile
30
+ - VERSION
31
+ - crags.gemspec
32
+ - lib/crags.rb
33
+ - lib/crags/fetch.rb
34
+ - lib/crags/proxy.rb
35
+ - lib/crags/runner.rb
36
+ - lib/crags/searcher.rb
37
+ - test/crags/fetch_test.rb
38
+ - test/crags/proxy_test.rb
39
+ - test/crags/runner_test.rb
40
+ - test/crags/searcher_test.rb
41
+ - test/test_helper.rb
42
+ has_rdoc: true
43
+ homepage: http://github.com/gotascii/crags
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ requirements: []
64
+
65
+ rubyforge_project:
66
+ rubygems_version: 1.3.5
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: A library to help search across multiple craigslist locations.
70
+ test_files:
71
+ - test/crags/fetch_test.rb
72
+ - test/crags/proxy_test.rb
73
+ - test/crags/runner_test.rb
74
+ - test/crags/searcher_test.rb
75
+ - test/test_helper.rb