fizx-rwget 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/../lib/rwget"
3
+
4
+ class LinksTest < Test::Unit::TestCase
5
+ def setup
6
+ @t = Tempfile.new("testings")
7
+ @t.puts <<-STR
8
+ <html><body>fdssdfsad
9
+ <a href="foo">boo</a></body></html>
10
+ STR
11
+ @t.close
12
+ @links = RWGet::Links.new
13
+ end
14
+
15
+ def teardown
16
+ end
17
+
18
+ def test_links
19
+ assert_equal [URI.parse("http://yahoo.com/foo/foo")], @links.urls(URI.parse("http://yahoo.com/foo/bar"), @t)
20
+ end
21
+
22
+ end
@@ -0,0 +1,14 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/server"
3
+ require File.dirname(__FILE__) + "/../lib/rwget"
4
+
5
+ class FetchTest < Test::Unit::TestCase
6
+
7
+ def test_put_get
8
+ queue = RWGet::Queue.new
9
+ queue.put("key", 1)
10
+ assert_equal(["key", 1], queue.get)
11
+ assert_nil queue.get
12
+ end
13
+
14
+ end
@@ -0,0 +1,28 @@
1
+ require "rubygems"
2
+ require "mongrel"
3
+
4
+
5
+ class SimpleHandler < Mongrel::HttpHandler
6
+ def process(request, response)
7
+ response.start(200) do |head,out|
8
+ head["Content-Type"] = "text/html"
9
+ out.write <<-HTML
10
+ <html><head></head><body>
11
+ <a href="../">shallower</a>
12
+ <a href="d/">deeper</a>
13
+ </body></html>
14
+ HTML
15
+ out.write(" " * 80000)
16
+ end
17
+ end
18
+ end
19
+
20
+ h = Mongrel::HttpServer.new("0.0.0.0", "5491")
21
+ h.register("/", SimpleHandler.new)
22
+
23
+ Thread.new do
24
+ h.run.join
25
+ end
26
+ sleep 1
27
+
28
+ $webroot = "http://127.0.0.1:5491"
@@ -0,0 +1,18 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/../lib/rwget"
3
+
4
+ class SitemapLinksTest < Test::Unit::TestCase
5
+ def setup
6
+ @links = RWGet::SitemapLinks.new
7
+ @base = URI.parse("http://eventbrite.com")
8
+ @index = File.open(File.dirname(__FILE__) + "/fixtures/sitemap_index.xml")
9
+ @individual = File.open(File.dirname(__FILE__) + "/fixtures/events00.xml.gz")
10
+ @html = File.open(File.dirname(__FILE__) + "/fixtures/yelp.html")
11
+ end
12
+
13
+ def test_links
14
+ assert_equal 18, @links.urls(@base, @index).length
15
+ assert_equal 39998, @links.urls(@base, @individual).length
16
+ assert_equal 0, @links.urls(@base, @html).length
17
+ end
18
+ end
@@ -0,0 +1,28 @@
1
+ require "test/unit"
2
+ require "fileutils"
3
+ require File.dirname(__FILE__) + "/../lib/rwget"
4
+
5
+ class StoreTest < Test::Unit::TestCase
6
+ include FileUtils
7
+
8
+ def setup
9
+ @tmp = File.dirname(__FILE__) + "/tmp"
10
+ mkdir_p @tmp
11
+ @store = RWGet::Store.new
12
+ @store.root = @tmp
13
+ end
14
+
15
+ def teardown
16
+ rm_rf @tmp
17
+ end
18
+
19
+ def test_put
20
+ file = Tempfile.new("testing")
21
+ file.puts "hello"
22
+ file.close
23
+ @store.put("foo/bar", file)
24
+ new_path = File.join(@tmp, "foo/bar", "index.html")
25
+ assert File.exists?(new_path)
26
+ assert File.read(new_path) =~ /hello/
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,137 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fizx-rwget
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Kyle Maxwell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-06-19 00:00:00 -07:00
13
+ default_executable: rwget
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hpricot
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.0.0
34
+ - - <
35
+ - !ruby/object:Gem::Version
36
+ version: "0.7"
37
+ version:
38
+ - !ruby/object:Gem::Dependency
39
+ name: fizx-robots
40
+ type: :runtime
41
+ version_requirement:
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 0.3.1
47
+ version:
48
+ - !ruby/object:Gem::Dependency
49
+ name: bloomfilter
50
+ type: :runtime
51
+ version_requirement:
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">"
55
+ - !ruby/object:Gem::Version
56
+ version: 0.0.0
57
+ version:
58
+ - !ruby/object:Gem::Dependency
59
+ name: libxml-ruby
60
+ type: :runtime
61
+ version_requirement:
62
+ version_requirements: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">"
65
+ - !ruby/object:Gem::Version
66
+ version: "0.9"
67
+ version:
68
+ description:
69
+ email: kyle@kylemaxwell.com
70
+ executables:
71
+ - rwget
72
+ extensions: []
73
+
74
+ extra_rdoc_files:
75
+ - README.markdown
76
+ files:
77
+ - .document
78
+ - .gitignore
79
+ - README.markdown
80
+ - Rakefile
81
+ - VERSION
82
+ - bin/rwget
83
+ - lib/rwget.rb
84
+ - lib/rwget/controller.rb
85
+ - lib/rwget/dupes.rb
86
+ - lib/rwget/fetch.rb
87
+ - lib/rwget/links.rb
88
+ - lib/rwget/queue.rb
89
+ - lib/rwget/rwget_option_parser.rb
90
+ - lib/rwget/sitemap_links.rb
91
+ - lib/rwget/store.rb
92
+ - test/controller_test.rb
93
+ - test/dupes_test.rb
94
+ - test/fetch_test.rb
95
+ - test/fixtures/events00.xml.gz
96
+ - test/fixtures/sitemap_index.xml
97
+ - test/fixtures/yelp.html
98
+ - test/links_test.rb
99
+ - test/queue_test.rb
100
+ - test/server.rb
101
+ - test/sitemap_links_test.rb
102
+ - test/store_test.rb
103
+ has_rdoc: true
104
+ homepage: http://github.com/fizx/rwget
105
+ post_install_message:
106
+ rdoc_options:
107
+ - --charset=UTF-8
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: "0"
115
+ version:
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: "0"
121
+ version:
122
+ requirements: []
123
+
124
+ rubyforge_project:
125
+ rubygems_version: 1.2.0
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: Ruby port of wget, emphasis on recursive/crawler
129
+ test_files:
130
+ - test/controller_test.rb
131
+ - test/dupes_test.rb
132
+ - test/fetch_test.rb
133
+ - test/links_test.rb
134
+ - test/queue_test.rb
135
+ - test/server.rb
136
+ - test/sitemap_links_test.rb
137
+ - test/store_test.rb