gotascii-crags 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ pkg/
data/History.txt ADDED
File without changes
data/Manifest.txt ADDED
@@ -0,0 +1,17 @@
1
+ .gitignore
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ crags.gemspec
7
+ lib/crags.rb
8
+ lib/crags/fetch.rb
9
+ lib/crags/proxy.rb
10
+ lib/crags/runner.rb
11
+ lib/crags/searcher.rb
12
+ lib/js/client.html
13
+ test/crags/fetch_test.rb
14
+ test/crags/proxy_test.rb
15
+ test/crags/runner_test.rb
16
+ test/crags/searcher_test.rb
17
+ test/test_helper.rb
data/README.txt ADDED
@@ -0,0 +1,56 @@
1
+ craigs
2
+ by Justin Marney
3
+ http://github.com/gotascii/crags
4
+
5
+ == DESCRIPTION:
6
+
7
+ A library to help search across multiple craigslist locations.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ Allows you to search across multiple craigslist locations. Automatically
12
+ delays multiple searches so craigslist doesn't ban your ip.
13
+
14
+ == SYNOPSIS:
15
+
16
+ # searches all available craigslist sites for a keyword in "for sale".
17
+ search(keyword)
18
+
19
+ # search also takes a category
20
+ search(keyword, 'sss')
21
+
22
+ There are a bunch of helpful api methods, check the tests for more info.
23
+
24
+ == REQUIREMENTS:
25
+
26
+ hpricot
27
+ curb
28
+
29
+ == INSTALL:
30
+
31
+ sudo gem install gotascii-crags -s http://gems.github.com
32
+
33
+ == LICENSE:
34
+
35
+ (The MIT License)
36
+
37
+ Copyright (c) 2008 FIXME (different license?)
38
+
39
+ Permission is hereby granted, free of charge, to any person obtaining
40
+ a copy of this software and associated documentation files (the
41
+ 'Software'), to deal in the Software without restriction, including
42
+ without limitation the rights to use, copy, modify, merge, publish,
43
+ distribute, sublicense, and/or sell copies of the Software, and to
44
+ permit persons to whom the Software is furnished to do so, subject to
45
+ the following conditions:
46
+
47
+ The above copyright notice and this permission notice shall be
48
+ included in all copies or substantial portions of the Software.
49
+
50
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
51
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
52
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
53
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
54
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
55
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
56
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ begin
2
+ require 'bones'
3
+ Bones.setup
4
+ rescue LoadError
5
+ load 'tasks/setup.rb'
6
+ end
7
+
8
+ ensure_in_path 'lib'
9
+ require 'crags'
10
+
11
+ task :default => 'test'
12
+
13
+ PROJ.name = 'crags'
14
+ PROJ.authors = 'Justin Marney'
15
+ PROJ.email = 'justin.marney@viget.com'
16
+ PROJ.url = 'http://github.com/vigetlabs/crags'
17
+ PROJ.version = Crags::VERSION
18
+ PROJ.rubyforge.name = 'crags'
19
+ PROJ.test.files = FileList['test/**/*_test.rb']
20
+ PROJ.spec.opts << '--color'
data/crags.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = %q{crags}
3
+ s.version = "1.0.2"
4
+
5
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
6
+ s.authors = ["Justin Marney"]
7
+ s.date = %q{2009-01-30}
8
+ s.description = %q{A library to help search across multiple craigslist locations.}
9
+ s.email = %q{gotascii@gmail.com}
10
+ s.extra_rdoc_files = ["History.txt", "README.txt", "lib/js/client.html"]
11
+ s.files = [".gitignore", "History.txt", "Manifest.txt", "README.txt", "Rakefile", "crags.gemspec", "lib/crags.rb", "lib/crags/fetch.rb", "lib/crags/proxy.rb", "lib/crags/runner.rb", "lib/crags/searcher.rb", "lib/js/client.html", "tasks/ann.rake", "tasks/bones.rake", "tasks/gem.rake", "tasks/git.rake", "tasks/manifest.rake", "tasks/notes.rake", "tasks/post_load.rake", "tasks/rdoc.rake", "tasks/rubyforge.rake", "tasks/setup.rb", "tasks/spec.rake", "tasks/svn.rake", "tasks/test.rake", "test/crags/fetch_test.rb", "test/crags/proxy_test.rb", "test/crags/runner_test.rb", "test/crags/searcher_test.rb", "test/test_helper.rb"]
12
+ s.has_rdoc = true
13
+ s.homepage = %q{http://github.com/gotascii/crags}
14
+ s.rdoc_options = ["--main", "README.txt"]
15
+ s.require_paths = ["lib"]
16
+ s.rubyforge_project = %q{crags}
17
+ s.rubygems_version = %q{1.3.1}
18
+ s.summary = %q{A library to help search across multiple craigslist locations}
19
+ s.test_files = ["test/crags/fetch_test.rb", "test/crags/proxy_test.rb", "test/crags/runner_test.rb", "test/crags/searcher_test.rb"]
20
+
21
+ if s.respond_to? :specification_version then
22
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
+ s.specification_version = 2
24
+
25
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ s.add_development_dependency(%q<bones>, [">= 2.1.1"])
27
+ else
28
+ s.add_dependency(%q<bones>, [">= 2.1.1"])
29
+ end
30
+ else
31
+ s.add_dependency(%q<bones>, [">= 2.1.1"])
32
+ end
33
+ end
@@ -0,0 +1,11 @@
1
+ module Crags
2
+ module Fetch
3
+ def fetch_doc(url)
4
+ Hpricot.parse(fetch_html(url))
5
+ end
6
+
7
+ def fetch_html(url)
8
+ Curl::Easy.perform(url).body_str
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,21 @@
1
+ module Crags
2
+ module Proxy
3
+ include Fetch
4
+
5
+ def lists
6
+ ["http://www.proxy4free.com/page1.html","http://www.proxy4free.com/page3.html"]
7
+ end
8
+
9
+ def fetch_lists
10
+ lists.collect {|url| fetch_html(url)}
11
+ end
12
+
13
+ def scan(text)
14
+ text.scan(/\d+\.\d+\.\d+\.\d+/)
15
+ end
16
+
17
+ def proxies
18
+ fetch_lists.collect{|html|scan(html)}.flatten.uniq
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,10 @@
1
+ module Crags
2
+ class Runner
3
+ include Searcher
4
+
5
+ def search_location(keyword, loc, category = 'sss')
6
+ puts "Searching #{loc}..."
7
+ super
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,53 @@
1
+ module Crags
2
+ module Searcher
3
+ include Fetch
4
+
5
+ def location_doc
6
+ fetch_doc("http://geo.craigslist.org/iso/us")
7
+ end
8
+
9
+ def location_links
10
+ location_doc.search("#list a")
11
+ end
12
+
13
+ def locations
14
+ location_links.collect{|link| link["href"] }
15
+ end
16
+
17
+ def categories
18
+ doc = fetch_doc("http://sfbay.craigslist.org/")
19
+ links = doc.search("table[@summary=\"for sale\"] a")
20
+ categories = {}
21
+ links.each do |link|
22
+ categories[link.inner_html] = link["href"]
23
+ end
24
+ categories
25
+ end
26
+
27
+ def search(keyword, category = 'sss', &block)
28
+ locations.collect do |loc|
29
+ sleep(1 + rand(3))
30
+ search_location(keyword, loc, category, &block)
31
+ end.flatten
32
+ end
33
+
34
+ def items(doc)
35
+ doc.search("item")
36
+ end
37
+
38
+ def search_location(keyword, loc, category = 'sss', &block)
39
+ doc = fetch_doc("#{loc}search/#{category}?query=#{keyword}&format=rss")
40
+ items(doc).collect do |item|
41
+ link = create_link(item)
42
+ yield link if block_given?
43
+ link
44
+ end
45
+ end
46
+
47
+ def create_link(item)
48
+ link = item["rdf:about"]
49
+ title = item.at("title").inner_text
50
+ "<a href=\"#{link}\">#{title}</a>"
51
+ end
52
+ end
53
+ end
data/lib/crags.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ require 'curb'
3
+ require 'hpricot'
4
+
5
+ module Crags
6
+ VERSION = '1.0.2'
7
+ LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
8
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
9
+
10
+ def self.version
11
+ VERSION
12
+ end
13
+
14
+ def self.libpath( *args )
15
+ args.empty? ? LIBPATH : ::File.join(LIBPATH, *args)
16
+ end
17
+
18
+ def self.path( *args )
19
+ args.empty? ? PATH : ::File.join(PATH, *args)
20
+ end
21
+ end
22
+
23
+ require 'crags/fetch'
24
+ require 'crags/proxy'
25
+ require 'crags/searcher'
26
+ require 'crags/runner'
@@ -0,0 +1,81 @@
1
+ <html>
2
+ <head>
3
+ <script language="javascript">
4
+ var IFrameObj; // our IFrame object
5
+ var IFrameDoc;
6
+ function callToServer() {
7
+ if (!document.createElement) {return true};
8
+ var URL = 'http://washingtondc.craigslist.org/bik/index.rss';
9
+ if (!IFrameObj && document.createElement) {
10
+ // create the IFrame and assign a reference to the
11
+ // object to our global variable IFrameObj.
12
+ // this will only happen the first time
13
+ // callToServer() is called
14
+ try {
15
+ var tempIFrame=document.createElement('iframe');
16
+ tempIFrame.setAttribute('id','RSIFrame');
17
+ tempIFrame.style.border='0px';
18
+ tempIFrame.style.width='0px';
19
+ tempIFrame.style.height='0px';
20
+ IFrameObj = document.body.appendChild(tempIFrame);
21
+
22
+ if (document.frames) {
23
+ // this is for IE5 Mac, because it will only
24
+ // allow access to the document object
25
+ // of the IFrame if we access it through
26
+ // the document.frames array
27
+ IFrameObj = document.frames['RSIFrame'];
28
+ }
29
+ } catch(exception) {
30
+ // This is for IE5 PC, which does not allow dynamic creation
31
+ // and manipulation of an iframe object. Instead, we'll fake
32
+ // it up by creating our own objects.
33
+ iframeHTML='\<iframe id="RSIFrame" style="';
34
+ iframeHTML+='border:0px;';
35
+ iframeHTML+='width:0px;';
36
+ iframeHTML+='height:0px;';
37
+ iframeHTML+='"><\/iframe>';
38
+ document.body.innerHTML+=iframeHTML;
39
+ IFrameObj = new Object();
40
+ IFrameObj.document = new Object();
41
+ IFrameObj.document.location = new Object();
42
+ IFrameObj.document.location.iframe = document.getElementById('RSIFrame');
43
+ IFrameObj.document.location.replace = function(location) {
44
+ this.iframe.src = location;
45
+ }
46
+ }
47
+ }
48
+
49
+ if (navigator.userAgent.indexOf('Gecko') !=-1 && !IFrameObj.contentDocument) {
50
+ // we have to give NS6 a fraction of a second
51
+ // to recognize the new IFrame
52
+ setTimeout('callToServer()',10);
53
+ return false;
54
+ }
55
+
56
+ if (IFrameObj.contentDocument) {
57
+ // For NS6
58
+ IFrameDoc = IFrameObj.contentDocument;
59
+ } else if (IFrameObj.contentWindow) {
60
+ // For IE5.5 and IE6
61
+ IFrameDoc = IFrameObj.contentWindow.document;
62
+ } else if (IFrameObj.document) {
63
+ // For IE5
64
+ IFrameDoc = IFrameObj.document;
65
+ } else {
66
+ return true;
67
+ }
68
+
69
+ IFrameDoc.location.replace(URL);
70
+ return false;
71
+ }
72
+
73
+ </script>
74
+ </head>
75
+ <body>
76
+ <script>
77
+ callToServer();
78
+ alert(IFrameDoc.innerHTML);
79
+ </script>
80
+ </body>
81
+ </html>
@@ -0,0 +1,20 @@
1
+ require '../test_helper'
2
+
3
+ context "Fetch" do
4
+ setup do
5
+ extend Crags::Fetch
6
+ end
7
+
8
+ specify "fetch doc should hpricot fetched html" do
9
+ stubs(:fetch_html).with("url").returns("html")
10
+ Hpricot.expects(:parse).with("html").returns("doc")
11
+ fetch_doc("url").should == "doc"
12
+ end
13
+
14
+ specify "fetch html should curl a url" do
15
+ curb = stub(:body_str => "uhh")
16
+ Curl::Easy.expects(:perform).with("url").returns(curb)
17
+ fetch_html("url").should == "uhh"
18
+ end
19
+ end
20
+
@@ -0,0 +1,24 @@
1
+ require '../test_helper'
2
+
3
+ context "Proxy" do
4
+ setup do
5
+ extend Crags::Proxy
6
+ end
7
+
8
+ specify "lists should return a list of proxy list websites" do
9
+ lists.should == ["http://www.proxy4free.com/page1.html"]
10
+ end
11
+
12
+ specify "fetch lists should fetch html for each site in lists" do
13
+ stubs(:lists).returns(["1", "2"])
14
+ expects(:fetch_html).with("1").returns("html_1")
15
+ expects(:fetch_html).with("2").returns("html_2")
16
+ fetch_lists.should == ["html_1", "html_2"]
17
+ end
18
+
19
+ specify "scan should return all ips in a text blizoc" do
20
+ text = "192.168.1.2 omg dude!! wtf.f.f.asdasd9.8.9 78.900.42.32"
21
+ scan(text).should == ["192.168.1.2", "78.900.42.32"]
22
+ end
23
+ end
24
+
@@ -0,0 +1,26 @@
1
+ require '../test_helper'
2
+
3
+ context "Runner" do
4
+ setup do
5
+ @runner = Crags::Runner.new
6
+ @runner.stubs(:fetch_doc)
7
+ @runner.stubs(:items).returns([])
8
+ end
9
+
10
+ specify "runner should include searcher" do
11
+ Crags::Runner.ancestors.should.include Crags::Searcher
12
+ end
13
+
14
+ specify "search location should puts message with loc" do
15
+ @runner.expects(:puts).with { |val| val =~ /location/ }
16
+ @runner.search_location("", "location", "category")
17
+ end
18
+
19
+ specify "search location should take a category" do
20
+ @runner.search_location("", "location", "category")
21
+ end
22
+
23
+ specify "search location should have default category sss" do
24
+ @runner.search_location("", "location")
25
+ end
26
+ end
@@ -0,0 +1,123 @@
1
+ require '../test_helper'
2
+
3
+ context "Searcher with stubbed fetch doc" do
4
+ setup do
5
+ extend Crags::Searcher
6
+ stubs(:sleep)
7
+ stubs(:fetch_doc)
8
+ end
9
+
10
+ specify "location doc should fetch doc at location url" do
11
+ expects(:fetch_doc).with("http://geo.craigslist.org/iso/us").returns("doc")
12
+ location_doc.should == "doc"
13
+ end
14
+
15
+ specify "location links should get all a tags from div with id list" do
16
+ doc = mock { expects(:search).with("#list a").returns("links") }
17
+ stubs(:location_doc).returns(doc)
18
+ location_links.should == "links"
19
+ end
20
+
21
+ specify "locations should return array of urls using a location link's href" do
22
+ links = []
23
+ 2.times do |i|
24
+ links << mock {|m| m.expects(:[]).with("href").returns("url#{i}") }
25
+ end
26
+ stubs(:location_links).returns(links)
27
+ locations.should == ["url0", "url1"]
28
+ end
29
+
30
+ specify "search should search location for each location with keyword and return list" do
31
+ locations = ["url0", "url1"]
32
+
33
+ locations.each do |loc|
34
+ expects(:search_location).with("omg", loc, 'sss').returns(["1#{loc}", "2#{loc}"])
35
+ end
36
+
37
+ stubs(:locations).returns(locations)
38
+ search("omg").should == ["1url0", "2url0", "1url1", "2url1"]
39
+ end
40
+
41
+ specify "search should call sleep for each location" do
42
+ expects(:sleep).times(2)
43
+ stubs(:locations).returns([1,2])
44
+ stubs(:search_location)
45
+ search("")
46
+ end
47
+
48
+ specify "search location should fetch doc for search url" do
49
+ expects(:fetch_doc).with("urlsearch/sss?query=keyword&format=rss")
50
+ stubs(:items).returns([])
51
+ search_location("keyword", "url")
52
+ end
53
+
54
+ specify "search location should create link with each item in doc items and return list" do
55
+ items = [1,2,3]
56
+ expects(:items).returns(items)
57
+ items.each do |i|
58
+ expects(:create_link).with(i).returns("omg#{i}")
59
+ end
60
+ search_location("keyword", "url").should == ['omg1','omg2','omg3']
61
+ end
62
+
63
+ specify "create link should return an a href based on item element" do
64
+ inner_text = mock(:inner_text=>"text")
65
+ item = mock do |l|
66
+ expects(:[]).with("rdf:about").returns("link")
67
+ expects(:at).with("title").returns(inner_text)
68
+ end
69
+
70
+ create_link(item).should == "<a href=\"link\">text</a>"
71
+ end
72
+
73
+ specify "items should get all item elements from doc" do
74
+ doc = mock { expects(:search).with("item").returns(1) }
75
+ items(doc).should == 1
76
+ end
77
+
78
+ specify "categories should fetch doc the main sfbay page" do
79
+ doc = stub(:search => [])
80
+ expects(:fetch_doc).with("http://sfbay.craigslist.org/").returns(doc)
81
+ categories
82
+ end
83
+
84
+ specify "categories should search for all links in the table with property summary equal to for sale" do
85
+ doc = mock { expects(:search).with("table[@summary=\"for sale\"] a").returns([]) }
86
+ stubs(:fetch_doc).returns(doc)
87
+ categories
88
+ end
89
+
90
+ specify "categories should return a hash with link inner html keys and link href values" do
91
+ link = stub(:inner_html => "inner_html") do
92
+ stubs(:[]).with("href").returns("href")
93
+ end
94
+
95
+ doc = stub(:search => [link, link])
96
+ stubs(:fetch_doc).returns(doc)
97
+ categories.should == {'inner_html' => 'href', 'inner_html' => 'href'}
98
+ end
99
+
100
+ specify "search location should accept a category parameter" do
101
+ expects(:fetch_doc).with("locsearch/scram?query=keyword&format=rss")
102
+ stubs(:items).returns([])
103
+ search_location('keyword', 'loc', 'scram')
104
+ end
105
+
106
+ specify "search location default category is sss" do
107
+ expects(:fetch_doc).with("locsearch/sss?query=keyword&format=rss")
108
+ stubs(:items).returns([])
109
+ search_location('keyword', 'loc')
110
+ end
111
+
112
+ specify "search should pass parameter to search location" do
113
+ stubs(:locations).returns([0])
114
+ expects(:search_location).with('keyword', 0, 'chum')
115
+ search('keyword', 'chum')
116
+ end
117
+
118
+ specify "search should have default category of sss" do
119
+ stubs(:locations).returns([0])
120
+ expects(:search_location).with('keyword', 0, 'sss')
121
+ search('keyword')
122
+ end
123
+ end
@@ -0,0 +1,4 @@
1
+ require 'rubygems'
2
+ require 'test/spec'
3
+ require 'mocha'
4
+ require 'crags'
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gotascii-crags
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Justin Marney
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-30 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: bones
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.1.1
23
+ version:
24
+ description: A library to help search across multiple craigslist locations.
25
+ email: gotascii@gmail.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files:
31
+ - History.txt
32
+ - README.txt
33
+ - lib/js/client.html
34
+ files:
35
+ - .gitignore
36
+ - History.txt
37
+ - Manifest.txt
38
+ - README.txt
39
+ - Rakefile
40
+ - crags.gemspec
41
+ - lib/crags.rb
42
+ - lib/crags/fetch.rb
43
+ - lib/crags/proxy.rb
44
+ - lib/crags/runner.rb
45
+ - lib/crags/searcher.rb
46
+ - lib/js/client.html
47
+ - tasks/ann.rake
48
+ - tasks/bones.rake
49
+ - tasks/gem.rake
50
+ - tasks/git.rake
51
+ - tasks/manifest.rake
52
+ - tasks/notes.rake
53
+ - tasks/post_load.rake
54
+ - tasks/rdoc.rake
55
+ - tasks/rubyforge.rake
56
+ - tasks/setup.rb
57
+ - tasks/spec.rake
58
+ - tasks/svn.rake
59
+ - tasks/test.rake
60
+ - test/crags/fetch_test.rb
61
+ - test/crags/proxy_test.rb
62
+ - test/crags/runner_test.rb
63
+ - test/crags/searcher_test.rb
64
+ - test/test_helper.rb
65
+ has_rdoc: true
66
+ homepage: http://github.com/gotascii/crags
67
+ post_install_message:
68
+ rdoc_options:
69
+ - --main
70
+ - README.txt
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: "0"
84
+ version:
85
+ requirements: []
86
+
87
+ rubyforge_project: crags
88
+ rubygems_version: 1.2.0
89
+ signing_key:
90
+ specification_version: 2
91
+ summary: A library to help search across multiple craigslist locations
92
+ test_files:
93
+ - test/crags/fetch_test.rb
94
+ - test/crags/proxy_test.rb
95
+ - test/crags/runner_test.rb
96
+ - test/crags/searcher_test.rb