crags 1.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ pkg/
2
+ .DS_Store
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Justin Marney
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,33 @@
1
+ = crags
2
+
3
+ A library to help search across multiple craigslist locations.
4
+
5
+ == Usage
6
+
7
+ searches all available craigslist sites for a keyword in "for sale".
8
+ search(keyword)
9
+
10
+ search also takes a category
11
+ search(keyword, 'sss')
12
+
13
+ There are a bunch of helpful api methods, check the tests for more info.
14
+
15
+ == Requirements
16
+
17
+ * hpricot
18
+ * curb
19
+
20
+ == Note on Patches/Pull Requests
21
+
22
+ * Fork the project.
23
+ * Make your feature addition or bug fix.
24
+ * Add tests for it. This is important so I don't break it in a
25
+ future version unintentionally.
26
+ * Commit, do not mess with rakefile, version, or history.
27
+ (if you want to have your own version, that is fine but
28
+ bump version in a commit by itself I can ignore when I pull)
29
+ * Send me a pull request. Bonus points for topic branches.
30
+
31
+ == Copyright
32
+
33
+ Copyright (c) 2009 Justin Marney. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,55 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "crags"
8
+ gem.summary = %Q{A library to help search across multiple craigslist locations.}
9
+ gem.description = %Q{A library to help search across multiple craigslist locations.}
10
+ gem.email = "gotascii@gmail.com"
11
+ gem.homepage = "http://github.com/gotascii/crags"
12
+ gem.authors = ["Justin Marney"]
13
+ end
14
+ Jeweler::GemcutterTasks.new
15
+ rescue LoadError
16
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
+ end
18
+
19
+ require 'rake/testtask'
20
+ Rake::TestTask.new(:test) do |test|
21
+ test.libs << 'lib' << 'test'
22
+ test.pattern = 'test/**/*_test.rb'
23
+ test.verbose = true
24
+ end
25
+
26
+ begin
27
+ require 'rcov/rcovtask'
28
+ Rcov::RcovTask.new do |test|
29
+ test.libs << 'test'
30
+ test.pattern = 'test/**/*_test.rb'
31
+ test.verbose = true
32
+ end
33
+ rescue LoadError
34
+ task :rcov do
35
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
+ end
37
+ end
38
+
39
+ task :test => :check_dependencies
40
+
41
+ task :default => :test
42
+
43
+ require 'rake/rdoctask'
44
+ Rake::RDocTask.new do |rdoc|
45
+ if File.exist?('VERSION')
46
+ version = File.read('VERSION')
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "crags #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.5.9
data/crags.gemspec ADDED
@@ -0,0 +1,59 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{crags}
8
+ s.version = "1.5.9"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Justin Marney"]
12
+ s.date = %q{2009-10-13}
13
+ s.description = %q{A library to help search across multiple craigslist locations.}
14
+ s.email = %q{gotascii@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "crags.gemspec",
26
+ "lib/crags.rb",
27
+ "lib/crags/fetch.rb",
28
+ "lib/crags/proxy.rb",
29
+ "lib/crags/runner.rb",
30
+ "lib/crags/searcher.rb",
31
+ "test/crags/fetch_test.rb",
32
+ "test/crags/proxy_test.rb",
33
+ "test/crags/runner_test.rb",
34
+ "test/crags/searcher_test.rb",
35
+ "test/test_helper.rb"
36
+ ]
37
+ s.homepage = %q{http://github.com/gotascii/crags}
38
+ s.rdoc_options = ["--charset=UTF-8"]
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = %q{1.3.5}
41
+ s.summary = %q{A library to help search across multiple craigslist locations.}
42
+ s.test_files = [
43
+ "test/crags/fetch_test.rb",
44
+ "test/crags/proxy_test.rb",
45
+ "test/crags/runner_test.rb",
46
+ "test/crags/searcher_test.rb",
47
+ "test/test_helper.rb"
48
+ ]
49
+
50
+ if s.respond_to? :specification_version then
51
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
52
+ s.specification_version = 3
53
+
54
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
+ else
56
+ end
57
+ else
58
+ end
59
+ end
data/lib/crags.rb ADDED
@@ -0,0 +1,41 @@
1
+ require 'curb'
2
+ require 'hpricot'
3
+ require 'erb'
4
+
5
+ module Crags
6
+ COUNTRIES = [
7
+ 'jp',
8
+ 'ar',
9
+ 'bd',
10
+ 'br',
11
+ 'ca',
12
+ 'cl',
13
+ 'co',
14
+ 'cr',
15
+ 'cz',
16
+ 'de',
17
+ 'eg',
18
+ 'hu',
19
+ 'id',
20
+ 'ie',
21
+ 'il',
22
+ 'lb',
23
+ 'my',
24
+ 'nl',
25
+ 'nz',
26
+ 'no',
27
+ 'pk',
28
+ 'pa',
29
+ 'ru',
30
+ 'th',
31
+ 'ae',
32
+ 'us',
33
+ 've',
34
+ 'vn'
35
+ ]
36
+ end
37
+
38
+ require 'crags/fetch'
39
+ require 'crags/proxy'
40
+ require 'crags/searcher'
41
+ require 'crags/runner'
@@ -0,0 +1,19 @@
1
+ module Crags
2
+ module Fetch
3
+ def fetch_doc(url)
4
+ Hpricot.parse(fetch_html(url))
5
+ end
6
+
7
+ def fetch_html(url)
8
+ req = fetch_request(url)
9
+ req.body_str
10
+ end
11
+
12
+ def fetch_request(url)
13
+ req = Curl::Easy.new(url)
14
+ req.follow_location = true
15
+ req.perform
16
+ req
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ module Crags
2
+ module Proxy
3
+ include Fetch
4
+
5
+ def lists
6
+ ["http://www.proxy4free.com/page1.html","http://www.proxy4free.com/page3.html"]
7
+ end
8
+
9
+ def fetch_lists
10
+ lists.collect {|url| fetch_html(url)}
11
+ end
12
+
13
+ def scan(text)
14
+ text.scan(/\d+\.\d+\.\d+\.\d+/)
15
+ end
16
+
17
+ def proxies
18
+ fetch_lists.collect{|html|scan(html)}.flatten.uniq
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,10 @@
1
+ module Crags
2
+ class Runner
3
+ include Searcher
4
+
5
+ def search_location(keyword, loc, category = 'sss')
6
+ puts "Searching #{loc}..."
7
+ super
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,77 @@
1
+ module Crags
2
+ module Searcher
3
+ include Fetch
4
+ include ERB::Util
5
+
6
+ def strip_http(url)
7
+ url.gsub(/^http\:\/\//,'').gsub(/\/$/,'')
8
+ end
9
+
10
+ def location_link(country)
11
+ "http://geo.craigslist.org/iso/#{country}"
12
+ end
13
+
14
+ def location_doc(country)
15
+ fetch_doc(location_link(country))
16
+ end
17
+
18
+ def location_request(country)
19
+ fetch_request(location_link(country))
20
+ end
21
+
22
+ def location_links(country)
23
+ location_doc(country).search("#list a")
24
+ end
25
+
26
+ def locations(country)
27
+ linkz = location_links(country)
28
+ if linkz.empty?
29
+ [strip_http(location_request(country).last_effective_url)]
30
+ else
31
+ linkz.collect{|link| strip_http(link["href"]) }
32
+ end
33
+ end
34
+
35
+ def categories
36
+ doc = fetch_doc("http://sfbay.craigslist.org/")
37
+ links = doc.search("table[@summary=\"for sale\"] a")
38
+ categories = {}
39
+ links.each do |link|
40
+ categories[link.inner_html] = link["href"]
41
+ end
42
+ categories
43
+ end
44
+
45
+ def search(keyword, country = 'us', category = 'sss', &block)
46
+ locations(country).collect do |loc|
47
+ sleep(1 + rand(3))
48
+ search_location(keyword, loc, category, &block)
49
+ end.flatten
50
+ end
51
+
52
+ def items(doc)
53
+ doc.search("item").collect do |item|
54
+ hashify(item)
55
+ end
56
+ end
57
+
58
+ def hashify(item)
59
+ title = item.at("title").inner_text
60
+ url = strip_http(item["rdf:about"])
61
+ date = DateTime.parse(item.at("dc:date").inner_text)
62
+ {:title => title, :url => url, :date => date}
63
+ end
64
+
65
+ def search_location_link(keyword, loc, category = 'sss')
66
+ "http://#{loc}/search/#{category}?query=#{url_encode(keyword)}"
67
+ end
68
+
69
+ def search_location(keyword, loc, category = 'sss', &block)
70
+ doc = fetch_doc("#{search_location_link(keyword, loc, category)}&format=rss")
71
+ items(doc).collect do |item|
72
+ yield item if block_given?
73
+ item
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class Crags::FetchTest < Test::Unit::TestCase
4
+ context "Fetch" do
5
+ setup do
6
+ extend Crags::Fetch
7
+ end
8
+
9
+ should "fetch doc should hpricot fetched html" do
10
+ stubs(:fetch_html).with("url").returns("html")
11
+ Hpricot.expects(:parse).with("html").returns("doc")
12
+ fetch_doc("url").should == "doc"
13
+ end
14
+
15
+ should "fetch html should fetch_request a url" do
16
+ curb = stub(:body_str => "uhh")
17
+ expects(:fetch_request).with("url").returns(curb)
18
+ fetch_html("url").should == "uhh"
19
+ end
20
+
21
+ should "create a new request" do
22
+ req = stub(:follow_location= => nil, :perform => nil)
23
+ Curl::Easy.expects(:new).with("url").returns(req)
24
+ fetch_request("url").should == req
25
+ end
26
+
27
+ should "follow redirects for fetched requests" do
28
+ req = mock(:follow_location= => nil, :perform => nil)
29
+ Curl::Easy.stubs(:new).returns(req)
30
+ fetch_request("url")
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,25 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class Crags::ProxyTest < Test::Unit::TestCase
4
+ context "Proxy" do
5
+ setup do
6
+ extend Crags::Proxy
7
+ end
8
+
9
+ should "lists should return a list of proxy list websites" do
10
+ lists.should == ["http://www.proxy4free.com/page1.html", "http://www.proxy4free.com/page3.html"]
11
+ end
12
+
13
+ should "fetch lists should fetch html for each site in lists" do
14
+ stubs(:lists).returns(["1", "2"])
15
+ expects(:fetch_html).with("1").returns("html_1")
16
+ expects(:fetch_html).with("2").returns("html_2")
17
+ fetch_lists.should == ["html_1", "html_2"]
18
+ end
19
+
20
+ should "scan should return all ips in a text blizoc" do
21
+ text = "192.168.1.2 omg dude!! wtf.f.f.asdasd9.8.9 78.900.42.32"
22
+ scan(text).should == ["192.168.1.2", "78.900.42.32"]
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,28 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class Crags::RunnerTest < Test::Unit::TestCase
4
+ context "instance of Runner" do
5
+ setup do
6
+ @runner = Crags::Runner.new
7
+ @runner.stubs(:fetch_doc)
8
+ @runner.stubs(:items).returns([])
9
+ end
10
+
11
+ should "runner should include searcher" do
12
+ Crags::Runner.ancestors.include?(Crags::Searcher).should == true
13
+ end
14
+
15
+ should "search location should puts message with loc" do
16
+ @runner.expects(:puts).with { |val| val =~ /location/ }
17
+ @runner.search_location("", "location", "category")
18
+ end
19
+
20
+ should "search location should take a category" do
21
+ @runner.search_location("", "location", "category")
22
+ end
23
+
24
+ should "search location should have default category sss" do
25
+ @runner.search_location("", "location")
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,139 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class Crags::SearcherTest < Test::Unit::TestCase
4
+
5
+ context "Searcher with stubbed fetch doc" do
6
+ setup do
7
+ extend Crags::Searcher
8
+ stubs(:sleep)
9
+ stubs(:fetch_doc)
10
+ end
11
+
12
+ should "strip_http should remove http:// and trailing /" do
13
+ url = "http://omg/"
14
+ strip_http(url).should == "omg"
15
+ end
16
+
17
+ should "strip_http should remove http:// when there is no trailing slash" do
18
+ url = "http://omg"
19
+ strip_http(url).should == "omg"
20
+ end
21
+
22
+ should "location doc should fetch doc at location url" do
23
+ expects(:fetch_doc).with("http://geo.craigslist.org/iso/us").returns("doc")
24
+ location_doc('us').should == "doc"
25
+ end
26
+
27
+ should "location links should get all a tags from div with id list" do
28
+ doc = mock { expects(:search).with("#list a").returns("links") }
29
+ stubs(:location_doc).returns(doc)
30
+ location_links('us').should == "links"
31
+ end
32
+
33
+ should "locations should return array of urls using a location link's href" do
34
+ links = []
35
+ 2.times do |i|
36
+ links << mock {|m| m.expects(:[]).with("href").returns("http://url#{i}/") }
37
+ end
38
+ stubs(:location_links).returns(links)
39
+ locations('us').should == ["url0", "url1"]
40
+ end
41
+
42
+ should "locations should return array of one url using location_urls last_effective_url when no links are present on location_url page" do
43
+ stubs(:location_links).returns([])
44
+ req = mock(:last_effective_url => 'http://url.org/')
45
+ stubs(:location_request).with('us').returns(req)
46
+ locations('us').should == ["url.org"]
47
+ end
48
+
49
+ should "search should search location for each location with keyword and return list" do
50
+ locations = ["url0", "url1"]
51
+
52
+ locations.each do |loc|
53
+ expects(:search_location).with("omg", loc, 'sss').returns(["1#{loc}", "2#{loc}"])
54
+ end
55
+
56
+ stubs(:locations).returns(locations)
57
+ search("omg").should == ["1url0", "2url0", "1url1", "2url1"]
58
+ end
59
+
60
+ should "search should call sleep for each location" do
61
+ expects(:sleep).times(2)
62
+ stubs(:locations).returns([1,2])
63
+ stubs(:search_location)
64
+ search("")
65
+ end
66
+
67
+ should "search location should fetch doc for search url" do
68
+ expects(:fetch_doc).with("http://url/search/sss?query=keyword&format=rss")
69
+ stubs(:items).returns([])
70
+ search_location("keyword", "url")
71
+ end
72
+
73
+ should "search location should create return items" do
74
+ items = [1,2,3]
75
+ expects(:items).returns(items)
76
+ search_location("keyword", "url").should == items
77
+ end
78
+
79
+ should "items should get all item elements from doc" do
80
+ item = stub
81
+ stubs(:hashify).with(item).returns(1)
82
+ doc = mock { expects(:search).with("item").returns([item]) }
83
+ items(doc).should == [1]
84
+ end
85
+
86
+ should "items should hashify all item elements from doc" do
87
+ item = stub
88
+ expects(:hashify).with(item).returns(1)
89
+ doc = stub { stubs(:search).returns([item]) }
90
+ items(doc).should == [1]
91
+ end
92
+
93
+ should "categories should fetch doc the main sfbay page" do
94
+ doc = stub(:search => [])
95
+ expects(:fetch_doc).with("http://sfbay.craigslist.org/").returns(doc)
96
+ categories
97
+ end
98
+
99
+ should "categories should search for all links in the table with property summary equal to for sale" do
100
+ doc = mock { expects(:search).with("table[@summary=\"for sale\"] a").returns([]) }
101
+ stubs(:fetch_doc).returns(doc)
102
+ categories
103
+ end
104
+
105
+ should "categories should return a hash with link inner html keys and link href values" do
106
+ link = stub(:inner_html => "inner_html") do
107
+ stubs(:[]).with("href").returns("href")
108
+ end
109
+
110
+ doc = stub(:search => [link, link])
111
+ stubs(:fetch_doc).returns(doc)
112
+ categories.should == {'inner_html' => 'href', 'inner_html' => 'href'}
113
+ end
114
+
115
+ should "search location should accept a category parameter" do
116
+ expects(:fetch_doc).with("http://loc/search/scram?query=keyword&format=rss")
117
+ stubs(:items).returns([])
118
+ search_location('keyword', 'loc', 'scram')
119
+ end
120
+
121
+ should "search location default category is sss" do
122
+ expects(:fetch_doc).with("http://loc/search/sss?query=keyword&format=rss")
123
+ stubs(:items).returns([])
124
+ search_location('keyword', 'loc')
125
+ end
126
+
127
+ should "search should pass parameter to search location" do
128
+ stubs(:locations).returns([0])
129
+ expects(:search_location).with('keyword', 0, 'chum')
130
+ search('keyword', 'us', 'chum')
131
+ end
132
+
133
+ should "search should have default category of sss" do
134
+ stubs(:locations).returns([0])
135
+ expects(:search_location).with('keyword', 0, 'sss')
136
+ search('keyword')
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'shoulda'
3
+ require 'matchy'
4
+ require 'mocha'
5
+ require 'crags'
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crags
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.9
5
+ platform: ruby
6
+ authors:
7
+ - Justin Marney
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-13 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A library to help search across multiple craigslist locations.
17
+ email: gotascii@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - .gitignore
27
+ - LICENSE
28
+ - README.rdoc
29
+ - Rakefile
30
+ - VERSION
31
+ - crags.gemspec
32
+ - lib/crags.rb
33
+ - lib/crags/fetch.rb
34
+ - lib/crags/proxy.rb
35
+ - lib/crags/runner.rb
36
+ - lib/crags/searcher.rb
37
+ - test/crags/fetch_test.rb
38
+ - test/crags/proxy_test.rb
39
+ - test/crags/runner_test.rb
40
+ - test/crags/searcher_test.rb
41
+ - test/test_helper.rb
42
+ has_rdoc: true
43
+ homepage: http://github.com/gotascii/crags
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ requirements: []
64
+
65
+ rubyforge_project:
66
+ rubygems_version: 1.3.5
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: A library to help search across multiple craigslist locations.
70
+ test_files:
71
+ - test/crags/fetch_test.rb
72
+ - test/crags/proxy_test.rb
73
+ - test/crags/runner_test.rb
74
+ - test/crags/searcher_test.rb
75
+ - test/test_helper.rb