rwget 0.0.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/../lib/rwget"
3
+
4
+ class LinksTest < Test::Unit::TestCase
5
+ def setup
6
+ @t = Tempfile.new("testings")
7
+ @t.puts <<-STR
8
+ <html><body>fdssdfsad
9
+ <a href="foo">boo</a></body></html>
10
+ STR
11
+ @t.close
12
+ @links = RWGet::Links.new
13
+ end
14
+
15
+ def teardown
16
+ end
17
+
18
+ def test_links
19
+ assert_equal [URI.parse("http://yahoo.com/foo/foo")], @links.urls(URI.parse("http://yahoo.com/foo/bar"), @t)
20
+ end
21
+
22
+ end
@@ -0,0 +1,14 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/server"
3
+ require File.dirname(__FILE__) + "/../lib/rwget"
4
+
5
+ class FetchTest < Test::Unit::TestCase
6
+
7
+ def test_put_get
8
+ queue = RWGet::Queue.new
9
+ queue.put("key", 1)
10
+ assert_equal(["key", 1], queue.get)
11
+ assert_nil queue.get
12
+ end
13
+
14
+ end
@@ -0,0 +1,28 @@
1
+ require "rubygems"
2
+ require "mongrel"
3
+
4
+
5
+ class SimpleHandler < Mongrel::HttpHandler
6
+ def process(request, response)
7
+ response.start(200) do |head,out|
8
+ head["Content-Type"] = "text/html"
9
+ out.write <<-HTML
10
+ <html><head></head><body>
11
+ <a href="../">shallower</a>
12
+ <a href="d/">deeper</a>
13
+ </body></html>
14
+ HTML
15
+ out.write(" " * 80000)
16
+ end
17
+ end
18
+ end
19
+
20
+ h = Mongrel::HttpServer.new("0.0.0.0", "5491")
21
+ h.register("/", SimpleHandler.new)
22
+
23
+ Thread.new do
24
+ h.run.join
25
+ end
26
+ sleep 1
27
+
28
+ $webroot = "http://127.0.0.1:5491"
@@ -0,0 +1,18 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__) + "/../lib/rwget"
3
+
4
+ class SitemapLinksTest < Test::Unit::TestCase
5
+ def setup
6
+ @links = RWGet::SitemapLinks.new
7
+ @base = URI.parse("http://eventbrite.com")
8
+ @index = File.open(File.dirname(__FILE__) + "/fixtures/sitemap_index.xml")
9
+ @individual = File.open(File.dirname(__FILE__) + "/fixtures/events00.xml.gz")
10
+ @html = File.open(File.dirname(__FILE__) + "/fixtures/yelp.html")
11
+ end
12
+
13
+ def test_links
14
+ assert_equal 18, @links.urls(@base, @index).length
15
+ assert_equal 39998, @links.urls(@base, @individual).length
16
+ assert_equal 0, @links.urls(@base, @html).length
17
+ end
18
+ end
@@ -0,0 +1,28 @@
1
+ require "test/unit"
2
+ require "fileutils"
3
+ require File.dirname(__FILE__) + "/../lib/rwget"
4
+
5
+ class StoreTest < Test::Unit::TestCase
6
+ include FileUtils
7
+
8
+ def setup
9
+ @tmp = File.dirname(__FILE__) + "/tmp"
10
+ mkdir_p @tmp
11
+ @store = RWGet::Store.new
12
+ @store.root = @tmp
13
+ end
14
+
15
+ def teardown
16
+ rm_rf @tmp
17
+ end
18
+
19
+ def test_put
20
+ file = Tempfile.new("testing")
21
+ file.puts "hello"
22
+ file.close
23
+ @store.put("foo/bar", file)
24
+ new_path = File.join(@tmp, "foo/bar", "index.html")
25
+ assert File.exists?(new_path)
26
+ assert File.read(new_path) =~ /hello/
27
+ end
28
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rwget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kyle Maxwell
@@ -9,27 +9,102 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-10 00:00:00 -07:00
13
- default_executable:
14
- dependencies: []
15
-
12
+ date: 2009-10-22 00:00:00 -07:00
13
+ default_executable: rwget
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hpricot
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.0.0
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: fizx-robots
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 0.3.1
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: igrigorik-bloomfilter
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">"
52
+ - !ruby/object:Gem::Version
53
+ version: 0.0.0
54
+ version:
55
+ - !ruby/object:Gem::Dependency
56
+ name: libxml-ruby
57
+ type: :runtime
58
+ version_requirement:
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">"
62
+ - !ruby/object:Gem::Version
63
+ version: "0.9"
64
+ version:
16
65
  description:
17
- email:
18
- executables: []
19
-
66
+ email: kyle@kylemaxwell.com
67
+ executables:
68
+ - rwget
20
69
  extensions: []
21
70
 
22
- extra_rdoc_files: []
23
-
24
- files: []
25
-
71
+ extra_rdoc_files:
72
+ - README.markdown
73
+ files:
74
+ - .document
75
+ - .gitignore
76
+ - README.markdown
77
+ - Rakefile
78
+ - VERSION
79
+ - bin/rwget
80
+ - lib/rwget.rb
81
+ - lib/rwget/controller.rb
82
+ - lib/rwget/dupes.rb
83
+ - lib/rwget/fetch.rb
84
+ - lib/rwget/links.rb
85
+ - lib/rwget/queue.rb
86
+ - lib/rwget/rwget_option_parser.rb
87
+ - lib/rwget/sitemap_links.rb
88
+ - lib/rwget/store.rb
89
+ - rwget.gemspec
90
+ - test/controller_test.rb
91
+ - test/dupes_test.rb
92
+ - test/fetch_test.rb
93
+ - test/fixtures/events00.xml.gz
94
+ - test/fixtures/sitemap_index.xml
95
+ - test/fixtures/yelp.html
96
+ - test/links_test.rb
97
+ - test/queue_test.rb
98
+ - test/server.rb
99
+ - test/sitemap_links_test.rb
100
+ - test/store_test.rb
26
101
  has_rdoc: true
27
- homepage:
102
+ homepage: http://github.com/fizx/rwget
28
103
  licenses: []
29
104
 
30
105
  post_install_message:
31
- rdoc_options: []
32
-
106
+ rdoc_options:
107
+ - --charset=UTF-8
33
108
  require_paths:
34
109
  - lib
35
110
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -47,9 +122,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
47
122
  requirements: []
48
123
 
49
124
  rubyforge_project:
50
- rubygems_version: 1.3.4
125
+ rubygems_version: 1.3.5
51
126
  signing_key:
52
127
  specification_version: 3
53
- summary: Placeholder for a gem to be migrated later
54
- test_files: []
55
-
128
+ summary: Ruby port of wget, emphasis on recursive/crawler
129
+ test_files:
130
+ - test/controller_test.rb
131
+ - test/dupes_test.rb
132
+ - test/fetch_test.rb
133
+ - test/links_test.rb
134
+ - test/queue_test.rb
135
+ - test/server.rb
136
+ - test/sitemap_links_test.rb
137
+ - test/store_test.rb