gotascii-crags 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ pkg/
data/History.txt ADDED
File without changes
data/Manifest.txt ADDED
@@ -0,0 +1,17 @@
1
+ .gitignore
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ crags.gemspec
7
+ lib/crags.rb
8
+ lib/crags/fetch.rb
9
+ lib/crags/proxy.rb
10
+ lib/crags/runner.rb
11
+ lib/crags/searcher.rb
12
+ lib/js/client.html
13
+ test/crags/fetch_test.rb
14
+ test/crags/proxy_test.rb
15
+ test/crags/runner_test.rb
16
+ test/crags/searcher_test.rb
17
+ test/test_helper.rb
data/README.txt ADDED
@@ -0,0 +1,56 @@
1
+ craigs
2
+ by Justin Marney
3
+ http://github.com/gotascii/crags
4
+
5
+ == DESCRIPTION:
6
+
7
+ A library to help search across multiple craigslist locations.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ Allows you to search across multiple craigslist locations. Automatically
12
+ delays multiple searches so craigslist doesn't ban your ip.
13
+
14
+ == SYNOPSIS:
15
+
16
+ # searches all available craigslist sites for a keyword in "for sale".
17
+ search(keyword)
18
+
19
+ # search also takes a category
20
+ search(keyword, 'sss')
21
+
22
+ There are a bunch of helpful api methods, check the tests for more info.
23
+
24
+ == REQUIREMENTS:
25
+
26
+ hpricot
27
+ curb
28
+
29
+ == INSTALL:
30
+
31
+ sudo gem install gotascii-crags -s http://gems.github.com
32
+
33
+ == LICENSE:
34
+
35
+ (The MIT License)
36
+
37
+ Copyright (c) 2008 FIXME (different license?)
38
+
39
+ Permission is hereby granted, free of charge, to any person obtaining
40
+ a copy of this software and associated documentation files (the
41
+ 'Software'), to deal in the Software without restriction, including
42
+ without limitation the rights to use, copy, modify, merge, publish,
43
+ distribute, sublicense, and/or sell copies of the Software, and to
44
+ permit persons to whom the Software is furnished to do so, subject to
45
+ the following conditions:
46
+
47
+ The above copyright notice and this permission notice shall be
48
+ included in all copies or substantial portions of the Software.
49
+
50
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
51
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
52
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
53
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
54
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
55
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
56
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ begin
2
+ require 'bones'
3
+ Bones.setup
4
+ rescue LoadError
5
+ load 'tasks/setup.rb'
6
+ end
7
+
8
+ ensure_in_path 'lib'
9
+ require 'crags'
10
+
11
+ task :default => 'test'
12
+
13
+ PROJ.name = 'crags'
14
+ PROJ.authors = 'Justin Marney'
15
+ PROJ.email = 'justin.marney@viget.com'
16
+ PROJ.url = 'http://github.com/vigetlabs/crags'
17
+ PROJ.version = Crags::VERSION
18
+ PROJ.rubyforge.name = 'crags'
19
+ PROJ.test.files = FileList['test/**/*_test.rb']
20
+ PROJ.spec.opts << '--color'
data/crags.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = %q{crags}
3
+ s.version = "1.0.2"
4
+
5
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
6
+ s.authors = ["Justin Marney"]
7
+ s.date = %q{2009-01-30}
8
+ s.description = %q{A library to help search across multiple craigslist locations.}
9
+ s.email = %q{gotascii@gmail.com}
10
+ s.extra_rdoc_files = ["History.txt", "README.txt", "lib/js/client.html"]
11
+ s.files = [".gitignore", "History.txt", "Manifest.txt", "README.txt", "Rakefile", "crags.gemspec", "lib/crags.rb", "lib/crags/fetch.rb", "lib/crags/proxy.rb", "lib/crags/runner.rb", "lib/crags/searcher.rb", "lib/js/client.html", "tasks/ann.rake", "tasks/bones.rake", "tasks/gem.rake", "tasks/git.rake", "tasks/manifest.rake", "tasks/notes.rake", "tasks/post_load.rake", "tasks/rdoc.rake", "tasks/rubyforge.rake", "tasks/setup.rb", "tasks/spec.rake", "tasks/svn.rake", "tasks/test.rake", "test/crags/fetch_test.rb", "test/crags/proxy_test.rb", "test/crags/runner_test.rb", "test/crags/searcher_test.rb", "test/test_helper.rb"]
12
+ s.has_rdoc = true
13
+ s.homepage = %q{http://github.com/gotascii/crags}
14
+ s.rdoc_options = ["--main", "README.txt"]
15
+ s.require_paths = ["lib"]
16
+ s.rubyforge_project = %q{crags}
17
+ s.rubygems_version = %q{1.3.1}
18
+ s.summary = %q{A library to help search across multiple craigslist locations}
19
+ s.test_files = ["test/crags/fetch_test.rb", "test/crags/proxy_test.rb", "test/crags/runner_test.rb", "test/crags/searcher_test.rb"]
20
+
21
+ if s.respond_to? :specification_version then
22
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
+ s.specification_version = 2
24
+
25
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ s.add_development_dependency(%q<bones>, [">= 2.1.1"])
27
+ else
28
+ s.add_dependency(%q<bones>, [">= 2.1.1"])
29
+ end
30
+ else
31
+ s.add_dependency(%q<bones>, [">= 2.1.1"])
32
+ end
33
+ end
@@ -0,0 +1,11 @@
1
+ module Crags
2
+ module Fetch
3
+ def fetch_doc(url)
4
+ Hpricot.parse(fetch_html(url))
5
+ end
6
+
7
+ def fetch_html(url)
8
+ Curl::Easy.perform(url).body_str
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,21 @@
1
+ module Crags
2
+ module Proxy
3
+ include Fetch
4
+
5
+ def lists
6
+ ["http://www.proxy4free.com/page1.html","http://www.proxy4free.com/page3.html"]
7
+ end
8
+
9
+ def fetch_lists
10
+ lists.collect {|url| fetch_html(url)}
11
+ end
12
+
13
+ def scan(text)
14
+ text.scan(/\d+\.\d+\.\d+\.\d+/)
15
+ end
16
+
17
+ def proxies
18
+ fetch_lists.collect{|html|scan(html)}.flatten.uniq
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,10 @@
1
+ module Crags
2
+ class Runner
3
+ include Searcher
4
+
5
+ def search_location(keyword, loc, category = 'sss')
6
+ puts "Searching #{loc}..."
7
+ super
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,53 @@
1
+ module Crags
2
+ module Searcher
3
+ include Fetch
4
+
5
+ def location_doc
6
+ fetch_doc("http://geo.craigslist.org/iso/us")
7
+ end
8
+
9
+ def location_links
10
+ location_doc.search("#list a")
11
+ end
12
+
13
+ def locations
14
+ location_links.collect{|link| link["href"] }
15
+ end
16
+
17
+ def categories
18
+ doc = fetch_doc("http://sfbay.craigslist.org/")
19
+ links = doc.search("table[@summary=\"for sale\"] a")
20
+ categories = {}
21
+ links.each do |link|
22
+ categories[link.inner_html] = link["href"]
23
+ end
24
+ categories
25
+ end
26
+
27
+ def search(keyword, category = 'sss', &block)
28
+ locations.collect do |loc|
29
+ sleep(1 + rand(3))
30
+ search_location(keyword, loc, category, &block)
31
+ end.flatten
32
+ end
33
+
34
+ def items(doc)
35
+ doc.search("item")
36
+ end
37
+
38
+ def search_location(keyword, loc, category = 'sss', &block)
39
+ doc = fetch_doc("#{loc}search/#{category}?query=#{keyword}&format=rss")
40
+ items(doc).collect do |item|
41
+ link = create_link(item)
42
+ yield link if block_given?
43
+ link
44
+ end
45
+ end
46
+
47
+ def create_link(item)
48
+ link = item["rdf:about"]
49
+ title = item.at("title").inner_text
50
+ "<a href=\"#{link}\">#{title}</a>"
51
+ end
52
+ end
53
+ end
data/lib/crags.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ require 'curb'
3
+ require 'hpricot'
4
+
5
+ module Crags
6
+ VERSION = '1.0.2'
7
+ LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
8
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
9
+
10
+ def self.version
11
+ VERSION
12
+ end
13
+
14
+ def self.libpath( *args )
15
+ args.empty? ? LIBPATH : ::File.join(LIBPATH, *args)
16
+ end
17
+
18
+ def self.path( *args )
19
+ args.empty? ? PATH : ::File.join(PATH, *args)
20
+ end
21
+ end
22
+
23
+ require 'crags/fetch'
24
+ require 'crags/proxy'
25
+ require 'crags/searcher'
26
+ require 'crags/runner'
@@ -0,0 +1,81 @@
1
+ <html>
2
+ <head>
3
+ <script language="javascript">
4
+ var IFrameObj; // our IFrame object
5
+ var IFrameDoc;
6
+ function callToServer() {
7
+ if (!document.createElement) {return true};
8
+ var URL = 'http://washingtondc.craigslist.org/bik/index.rss';
9
+ if (!IFrameObj && document.createElement) {
10
+ // create the IFrame and assign a reference to the
11
+ // object to our global variable IFrameObj.
12
+ // this will only happen the first time
13
+ // callToServer() is called
14
+ try {
15
+ var tempIFrame=document.createElement('iframe');
16
+ tempIFrame.setAttribute('id','RSIFrame');
17
+ tempIFrame.style.border='0px';
18
+ tempIFrame.style.width='0px';
19
+ tempIFrame.style.height='0px';
20
+ IFrameObj = document.body.appendChild(tempIFrame);
21
+
22
+ if (document.frames) {
23
+ // this is for IE5 Mac, because it will only
24
+ // allow access to the document object
25
+ // of the IFrame if we access it through
26
+ // the document.frames array
27
+ IFrameObj = document.frames['RSIFrame'];
28
+ }
29
+ } catch(exception) {
30
+ // This is for IE5 PC, which does not allow dynamic creation
31
+ // and manipulation of an iframe object. Instead, we'll fake
32
+ // it up by creating our own objects.
33
+ iframeHTML='\<iframe id="RSIFrame" style="';
34
+ iframeHTML+='border:0px;';
35
+ iframeHTML+='width:0px;';
36
+ iframeHTML+='height:0px;';
37
+ iframeHTML+='"><\/iframe>';
38
+ document.body.innerHTML+=iframeHTML;
39
+ IFrameObj = new Object();
40
+ IFrameObj.document = new Object();
41
+ IFrameObj.document.location = new Object();
42
+ IFrameObj.document.location.iframe = document.getElementById('RSIFrame');
43
+ IFrameObj.document.location.replace = function(location) {
44
+ this.iframe.src = location;
45
+ }
46
+ }
47
+ }
48
+
49
+ if (navigator.userAgent.indexOf('Gecko') !=-1 && !IFrameObj.contentDocument) {
50
+ // we have to give NS6 a fraction of a second
51
+ // to recognize the new IFrame
52
+ setTimeout('callToServer()',10);
53
+ return false;
54
+ }
55
+
56
+ if (IFrameObj.contentDocument) {
57
+ // For NS6
58
+ IFrameDoc = IFrameObj.contentDocument;
59
+ } else if (IFrameObj.contentWindow) {
60
+ // For IE5.5 and IE6
61
+ IFrameDoc = IFrameObj.contentWindow.document;
62
+ } else if (IFrameObj.document) {
63
+ // For IE5
64
+ IFrameDoc = IFrameObj.document;
65
+ } else {
66
+ return true;
67
+ }
68
+
69
+ IFrameDoc.location.replace(URL);
70
+ return false;
71
+ }
72
+
73
+ </script>
74
+ </head>
75
+ <body>
76
+ <script>
77
+ callToServer();
78
+ alert(IFrameDoc.innerHTML);
79
+ </script>
80
+ </body>
81
+ </html>
@@ -0,0 +1,20 @@
1
+ require '../test_helper'
2
+
3
+ context "Fetch" do
4
+ setup do
5
+ extend Crags::Fetch
6
+ end
7
+
8
+ specify "fetch doc should hpricot fetched html" do
9
+ stubs(:fetch_html).with("url").returns("html")
10
+ Hpricot.expects(:parse).with("html").returns("doc")
11
+ fetch_doc("url").should == "doc"
12
+ end
13
+
14
+ specify "fetch html should curl a url" do
15
+ curb = stub(:body_str => "uhh")
16
+ Curl::Easy.expects(:perform).with("url").returns(curb)
17
+ fetch_html("url").should == "uhh"
18
+ end
19
+ end
20
+
@@ -0,0 +1,24 @@
1
+ require '../test_helper'
2
+
3
+ context "Proxy" do
4
+ setup do
5
+ extend Crags::Proxy
6
+ end
7
+
8
+ specify "lists should return a list of proxy list websites" do
9
+ lists.should == ["http://www.proxy4free.com/page1.html"]
10
+ end
11
+
12
+ specify "fetch lists should fetch html for each site in lists" do
13
+ stubs(:lists).returns(["1", "2"])
14
+ expects(:fetch_html).with("1").returns("html_1")
15
+ expects(:fetch_html).with("2").returns("html_2")
16
+ fetch_lists.should == ["html_1", "html_2"]
17
+ end
18
+
19
+ specify "scan should return all ips in a text blizoc" do
20
+ text = "192.168.1.2 omg dude!! wtf.f.f.asdasd9.8.9 78.900.42.32"
21
+ scan(text).should == ["192.168.1.2", "78.900.42.32"]
22
+ end
23
+ end
24
+
@@ -0,0 +1,26 @@
1
+ require '../test_helper'
2
+
3
+ context "Runner" do
4
+ setup do
5
+ @runner = Crags::Runner.new
6
+ @runner.stubs(:fetch_doc)
7
+ @runner.stubs(:items).returns([])
8
+ end
9
+
10
+ specify "runner should include searcher" do
11
+ Crags::Runner.ancestors.should.include Crags::Searcher
12
+ end
13
+
14
+ specify "search location should puts message with loc" do
15
+ @runner.expects(:puts).with { |val| val =~ /location/ }
16
+ @runner.search_location("", "location", "category")
17
+ end
18
+
19
+ specify "search location should take a category" do
20
+ @runner.search_location("", "location", "category")
21
+ end
22
+
23
+ specify "search location should have default category sss" do
24
+ @runner.search_location("", "location")
25
+ end
26
+ end
@@ -0,0 +1,123 @@
1
+ require '../test_helper'
2
+
3
+ context "Searcher with stubbed fetch doc" do
4
+ setup do
5
+ extend Crags::Searcher
6
+ stubs(:sleep)
7
+ stubs(:fetch_doc)
8
+ end
9
+
10
+ specify "location doc should fetch doc at location url" do
11
+ expects(:fetch_doc).with("http://geo.craigslist.org/iso/us").returns("doc")
12
+ location_doc.should == "doc"
13
+ end
14
+
15
+ specify "location links should get all a tags from div with id list" do
16
+ doc = mock { expects(:search).with("#list a").returns("links") }
17
+ stubs(:location_doc).returns(doc)
18
+ location_links.should == "links"
19
+ end
20
+
21
+ specify "locations should return array of urls using a location link's href" do
22
+ links = []
23
+ 2.times do |i|
24
+ links << mock {|m| m.expects(:[]).with("href").returns("url#{i}") }
25
+ end
26
+ stubs(:location_links).returns(links)
27
+ locations.should == ["url0", "url1"]
28
+ end
29
+
30
+ specify "search should search location for each location with keyword and return list" do
31
+ locations = ["url0", "url1"]
32
+
33
+ locations.each do |loc|
34
+ expects(:search_location).with("omg", loc, 'sss').returns(["1#{loc}", "2#{loc}"])
35
+ end
36
+
37
+ stubs(:locations).returns(locations)
38
+ search("omg").should == ["1url0", "2url0", "1url1", "2url1"]
39
+ end
40
+
41
+ specify "search should call sleep for each location" do
42
+ expects(:sleep).times(2)
43
+ stubs(:locations).returns([1,2])
44
+ stubs(:search_location)
45
+ search("")
46
+ end
47
+
48
+ specify "search location should fetch doc for search url" do
49
+ expects(:fetch_doc).with("urlsearch/sss?query=keyword&format=rss")
50
+ stubs(:items).returns([])
51
+ search_location("keyword", "url")
52
+ end
53
+
54
+ specify "search location should create link with each item in doc items and return list" do
55
+ items = [1,2,3]
56
+ expects(:items).returns(items)
57
+ items.each do |i|
58
+ expects(:create_link).with(i).returns("omg#{i}")
59
+ end
60
+ search_location("keyword", "url").should == ['omg1','omg2','omg3']
61
+ end
62
+
63
+ specify "create link should return an a href based on item element" do
64
+ inner_text = mock(:inner_text=>"text")
65
+ item = mock do |l|
66
+ expects(:[]).with("rdf:about").returns("link")
67
+ expects(:at).with("title").returns(inner_text)
68
+ end
69
+
70
+ create_link(item).should == "<a href=\"link\">text</a>"
71
+ end
72
+
73
+ specify "items should get all item elements from doc" do
74
+ doc = mock { expects(:search).with("item").returns(1) }
75
+ items(doc).should == 1
76
+ end
77
+
78
+ specify "categories should fetch doc the main sfbay page" do
79
+ doc = stub(:search => [])
80
+ expects(:fetch_doc).with("http://sfbay.craigslist.org/").returns(doc)
81
+ categories
82
+ end
83
+
84
+ specify "categories should search for all links in the table with property summary equal to for sale" do
85
+ doc = mock { expects(:search).with("table[@summary=\"for sale\"] a").returns([]) }
86
+ stubs(:fetch_doc).returns(doc)
87
+ categories
88
+ end
89
+
90
+ specify "categories should return a hash with link inner html keys and link href values" do
91
+ link = stub(:inner_html => "inner_html") do
92
+ stubs(:[]).with("href").returns("href")
93
+ end
94
+
95
+ doc = stub(:search => [link, link])
96
+ stubs(:fetch_doc).returns(doc)
97
+ categories.should == {'inner_html' => 'href', 'inner_html' => 'href'}
98
+ end
99
+
100
+ specify "search location should accept a category parameter" do
101
+ expects(:fetch_doc).with("locsearch/scram?query=keyword&format=rss")
102
+ stubs(:items).returns([])
103
+ search_location('keyword', 'loc', 'scram')
104
+ end
105
+
106
+ specify "search location default category is sss" do
107
+ expects(:fetch_doc).with("locsearch/sss?query=keyword&format=rss")
108
+ stubs(:items).returns([])
109
+ search_location('keyword', 'loc')
110
+ end
111
+
112
+ specify "search should pass parameter to search location" do
113
+ stubs(:locations).returns([0])
114
+ expects(:search_location).with('keyword', 0, 'chum')
115
+ search('keyword', 'chum')
116
+ end
117
+
118
+ specify "search should have default category of sss" do
119
+ stubs(:locations).returns([0])
120
+ expects(:search_location).with('keyword', 0, 'sss')
121
+ search('keyword')
122
+ end
123
+ end
@@ -0,0 +1,4 @@
1
+ require 'rubygems'
2
+ require 'test/spec'
3
+ require 'mocha'
4
+ require 'crags'
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gotascii-crags
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Justin Marney
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-30 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: bones
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.1.1
23
+ version:
24
+ description: A library to help search across multiple craigslist locations.
25
+ email: gotascii@gmail.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files:
31
+ - History.txt
32
+ - README.txt
33
+ - lib/js/client.html
34
+ files:
35
+ - .gitignore
36
+ - History.txt
37
+ - Manifest.txt
38
+ - README.txt
39
+ - Rakefile
40
+ - crags.gemspec
41
+ - lib/crags.rb
42
+ - lib/crags/fetch.rb
43
+ - lib/crags/proxy.rb
44
+ - lib/crags/runner.rb
45
+ - lib/crags/searcher.rb
46
+ - lib/js/client.html
47
+ - tasks/ann.rake
48
+ - tasks/bones.rake
49
+ - tasks/gem.rake
50
+ - tasks/git.rake
51
+ - tasks/manifest.rake
52
+ - tasks/notes.rake
53
+ - tasks/post_load.rake
54
+ - tasks/rdoc.rake
55
+ - tasks/rubyforge.rake
56
+ - tasks/setup.rb
57
+ - tasks/spec.rake
58
+ - tasks/svn.rake
59
+ - tasks/test.rake
60
+ - test/crags/fetch_test.rb
61
+ - test/crags/proxy_test.rb
62
+ - test/crags/runner_test.rb
63
+ - test/crags/searcher_test.rb
64
+ - test/test_helper.rb
65
+ has_rdoc: true
66
+ homepage: http://github.com/gotascii/crags
67
+ post_install_message:
68
+ rdoc_options:
69
+ - --main
70
+ - README.txt
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: "0"
84
+ version:
85
+ requirements: []
86
+
87
+ rubyforge_project: crags
88
+ rubygems_version: 1.2.0
89
+ signing_key:
90
+ specification_version: 2
91
+ summary: A library to help search across multiple craigslist locations
92
+ test_files:
93
+ - test/crags/fetch_test.rb
94
+ - test/crags/proxy_test.rb
95
+ - test/crags/runner_test.rb
96
+ - test/crags/searcher_test.rb