fizx-rwget 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/../lib/rwget"
3
+
4
+ class LinksTest < Test::Unit::TestCase
5
+ def setup
6
+ @t = Tempfile.new("testings")
7
+ @t.puts <<-STR
8
+ <html><body>fdssdfsad
9
+ <a href="foo">boo</a></body></html>
10
+ STR
11
+ @t.close
12
+ @links = RWGet::Links.new
13
+ end
14
+
15
+ def teardown
16
+ end
17
+
18
+ def test_links
19
+ assert_equal [URI.parse("http://yahoo.com/foo/foo")], @links.urls(URI.parse("http://yahoo.com/foo/bar"), @t)
20
+ end
21
+
22
+ end
@@ -0,0 +1,14 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/server"
3
+ require File.dirname(__FILE__) + "/../lib/rwget"
4
+
5
+ class FetchTest < Test::Unit::TestCase
6
+
7
+ def test_put_get
8
+ queue = RWGet::Queue.new
9
+ queue.put("key", 1)
10
+ assert_equal(["key", 1], queue.get)
11
+ assert_nil queue.get
12
+ end
13
+
14
+ end
@@ -0,0 +1,28 @@
1
+ require "rubygems"
2
+ require "mongrel"
3
+
4
+
5
+ class SimpleHandler < Mongrel::HttpHandler
6
+ def process(request, response)
7
+ response.start(200) do |head,out|
8
+ head["Content-Type"] = "text/html"
9
+ out.write <<-HTML
10
+ <html><head></head><body>
11
+ <a href="../">shallower</a>
12
+ <a href="d/">deeper</a>
13
+ </body></html>
14
+ HTML
15
+ out.write(" " * 80000)
16
+ end
17
+ end
18
+ end
19
+
20
+ h = Mongrel::HttpServer.new("0.0.0.0", "5491")
21
+ h.register("/", SimpleHandler.new)
22
+
23
+ Thread.new do
24
+ h.run.join
25
+ end
26
+ sleep 1
27
+
28
+ $webroot = "http://127.0.0.1:5491"
@@ -0,0 +1,18 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/../lib/rwget"
3
+
4
+ class SitemapLinksTest < Test::Unit::TestCase
5
+ def setup
6
+ @links = RWGet::SitemapLinks.new
7
+ @base = URI.parse("http://eventbrite.com")
8
+ @index = File.open(File.dirname(__FILE__) + "/fixtures/sitemap_index.xml")
9
+ @individual = File.open(File.dirname(__FILE__) + "/fixtures/events00.xml.gz")
10
+ @html = File.open(File.dirname(__FILE__) + "/fixtures/yelp.html")
11
+ end
12
+
13
+ def test_links
14
+ assert_equal 18, @links.urls(@base, @index).length
15
+ assert_equal 39998, @links.urls(@base, @individual).length
16
+ assert_equal 0, @links.urls(@base, @html).length
17
+ end
18
+ end
@@ -0,0 +1,28 @@
1
+ require "test/unit"
2
+ require "fileutils"
3
+ require File.dirname(__FILE__) + "/../lib/rwget"
4
+
5
+ class StoreTest < Test::Unit::TestCase
6
+ include FileUtils
7
+
8
+ def setup
9
+ @tmp = File.dirname(__FILE__) + "/tmp"
10
+ mkdir_p @tmp
11
+ @store = RWGet::Store.new
12
+ @store.root = @tmp
13
+ end
14
+
15
+ def teardown
16
+ rm_rf @tmp
17
+ end
18
+
19
+ def test_put
20
+ file = Tempfile.new("testing")
21
+ file.puts "hello"
22
+ file.close
23
+ @store.put("foo/bar", file)
24
+ new_path = File.join(@tmp, "foo/bar", "index.html")
25
+ assert File.exists?(new_path)
26
+ assert File.read(new_path) =~ /hello/
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,137 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fizx-rwget
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Kyle Maxwell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-06-19 00:00:00 -07:00
13
+ default_executable: rwget
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hpricot
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.0.0
34
+ - - <
35
+ - !ruby/object:Gem::Version
36
+ version: "0.7"
37
+ version:
38
+ - !ruby/object:Gem::Dependency
39
+ name: fizx-robots
40
+ type: :runtime
41
+ version_requirement:
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 0.3.1
47
+ version:
48
+ - !ruby/object:Gem::Dependency
49
+ name: bloomfilter
50
+ type: :runtime
51
+ version_requirement:
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">"
55
+ - !ruby/object:Gem::Version
56
+ version: 0.0.0
57
+ version:
58
+ - !ruby/object:Gem::Dependency
59
+ name: libxml-ruby
60
+ type: :runtime
61
+ version_requirement:
62
+ version_requirements: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">"
65
+ - !ruby/object:Gem::Version
66
+ version: "0.9"
67
+ version:
68
+ description:
69
+ email: kyle@kylemaxwell.com
70
+ executables:
71
+ - rwget
72
+ extensions: []
73
+
74
+ extra_rdoc_files:
75
+ - README.markdown
76
+ files:
77
+ - .document
78
+ - .gitignore
79
+ - README.markdown
80
+ - Rakefile
81
+ - VERSION
82
+ - bin/rwget
83
+ - lib/rwget.rb
84
+ - lib/rwget/controller.rb
85
+ - lib/rwget/dupes.rb
86
+ - lib/rwget/fetch.rb
87
+ - lib/rwget/links.rb
88
+ - lib/rwget/queue.rb
89
+ - lib/rwget/rwget_option_parser.rb
90
+ - lib/rwget/sitemap_links.rb
91
+ - lib/rwget/store.rb
92
+ - test/controller_test.rb
93
+ - test/dupes_test.rb
94
+ - test/fetch_test.rb
95
+ - test/fixtures/events00.xml.gz
96
+ - test/fixtures/sitemap_index.xml
97
+ - test/fixtures/yelp.html
98
+ - test/links_test.rb
99
+ - test/queue_test.rb
100
+ - test/server.rb
101
+ - test/sitemap_links_test.rb
102
+ - test/store_test.rb
103
+ has_rdoc: true
104
+ homepage: http://github.com/fizx/rwget
105
+ post_install_message:
106
+ rdoc_options:
107
+ - --charset=UTF-8
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: "0"
115
+ version:
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: "0"
121
+ version:
122
+ requirements: []
123
+
124
+ rubyforge_project:
125
+ rubygems_version: 1.2.0
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: Ruby port of wget, emphasis on recursive/crawler
129
+ test_files:
130
+ - test/controller_test.rb
131
+ - test/dupes_test.rb
132
+ - test/fetch_test.rb
133
+ - test/links_test.rb
134
+ - test/queue_test.rb
135
+ - test/server.rb
136
+ - test/sitemap_links_test.rb
137
+ - test/store_test.rb