scrapes 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +22 -0
- data/README +123 -0
- data/demo/demo.rb +33 -0
- data/demo/pages/about.rb +32 -0
- data/demo/pages/main.rb +32 -0
- data/lib/scrapes.rb +41 -0
- data/lib/scrapes/cache.rb +110 -0
- data/lib/scrapes/cookbook.rb +53 -0
- data/lib/scrapes/cookies.rb +45 -0
- data/lib/scrapes/crawler.rb +97 -0
- data/lib/scrapes/hpricot.rb +110 -0
- data/lib/scrapes/initializer.rb +86 -0
- data/lib/scrapes/page.rb +319 -0
- data/lib/scrapes/rule_parser.rb +327 -0
- data/lib/scrapes/session.rb +155 -0
- data/lib/scrapes/to_proxy.rb +50 -0
- data/test/cache.rb +75 -0
- data/test/cookies.rb +34 -0
- data/test/crawler.rb +69 -0
- data/test/hpricot.rb +55 -0
- data/test/initializer.rb +54 -0
- data/test/lib/server.rb +63 -0
- data/test/page.rb +77 -0
- data/test/pages/foils.rb +61 -0
- data/test/pages/foils2.rb +38 -0
- data/test/pages/redhanded_entries.rb +36 -0
- data/test/pages/redhanded_main.rb +58 -0
- data/test/pages/rule_parser.rb +81 -0
- data/test/pages/simple.rb +21 -0
- data/test/public/foil72.html +10 -0
- data/test/public/foil73.html +9 -0
- data/test/public/foil74.html +11 -0
- data/test/public/foo.txt +1 -0
- data/test/public/index.html +20 -0
- data/test/public/redhanded.html +1208 -0
- data/test/public/rule_parser.html +21 -0
- data/test/public/simple.html +8 -0
- data/test/rule_parser.rb +151 -0
- data/test/session.rb +45 -0
- data/test/textcontent.rb +71 -0
- metadata +123 -0
@@ -0,0 +1,50 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
module Scrapes
|
26
|
+
################################################################################
|
27
|
+
# Link pages together. Useful for when one only contains links to the next page.
|
28
|
+
class ToProxy
|
29
|
+
################################################################################
|
30
|
+
def initialize (from, to)
|
31
|
+
@from, @to = from, to
|
32
|
+
end
|
33
|
+
|
34
|
+
################################################################################
|
35
|
+
def extract (data, uri, session, &block)
|
36
|
+
@from.extract(data, uri, session) do |link|
|
37
|
+
session.page(@to, link, &block)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
################################################################################
|
42
|
+
# Continue the string of connections.
|
43
|
+
def to (next_in_line)
|
44
|
+
self.class.new(self, next_in_line)
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
################################################################################
|
49
|
+
end
|
50
|
+
################################################################################
|
data/test/cache.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
require 'fileutils'
|
26
|
+
require 'test/lib/server'
|
27
|
+
require 'scrapes/cache'
|
28
|
+
require 'test/unit'
|
29
|
+
|
30
|
+
class TestCache < Test::Unit::TestCase
|
31
|
+
include LocalHTTPServer
|
32
|
+
|
33
|
+
def setup
|
34
|
+
start_server
|
35
|
+
@cache = Scrapes::Cache.new
|
36
|
+
end
|
37
|
+
|
38
|
+
def teardown
|
39
|
+
stop_server
|
40
|
+
FileUtils.remove_dir 'cache' if File.exist? 'cache'
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_truth
|
44
|
+
assert @server
|
45
|
+
assert @cache
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_directory_attr
|
49
|
+
assert_equal @cache.directory, File.expand_path('cache')
|
50
|
+
assert_nothing_raised { @cache.directory = 'cache' }
|
51
|
+
assert_equal @cache.directory, 'cache'
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_enabled_attr
|
55
|
+
assert_equal @cache.enabled, false
|
56
|
+
assert_nothing_raised { @cache.enabled = true }
|
57
|
+
assert @cache.enabled
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_update
|
61
|
+
assert_nothing_raised { @cache.update 'foo.txt', localhost_http_get('foo.txt') }
|
62
|
+
assert(!@cache.check('foo.txt'))
|
63
|
+
assert_nothing_raised { @cache.enabled = true }
|
64
|
+
assert_nothing_raised { @cache.update 'foo.txt', localhost_http_get('foo.txt') }
|
65
|
+
assert(@cache.check('foo.txt'))
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_without_cache
|
69
|
+
assert_nothing_raised { @cache.enabled = true }
|
70
|
+
@cache.without_cache do
|
71
|
+
assert_nothing_raised { @cache.update 'foo.txt', localhost_http_get('foo.txt') }
|
72
|
+
assert(!@cache.check('foo.txt'))
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/test/cookies.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
require 'scrapes/cookies'
|
26
|
+
require 'test/unit'
|
27
|
+
|
28
|
+
class TestCookies < Test::Unit::TestCase
|
29
|
+
def test_parser
|
30
|
+
cookies = Scrapes::Cookies.new
|
31
|
+
cookies.from_header('sid=21;domain=.example.com;Path=/')
|
32
|
+
assert_equal('sid=21', cookies.to_header)
|
33
|
+
end
|
34
|
+
end
|
data/test/crawler.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
require 'fileutils'
|
26
|
+
require 'test/lib/server'
|
27
|
+
require 'scrapes/crawler'
|
28
|
+
require 'scrapes/session'
|
29
|
+
require 'test/unit'
|
30
|
+
|
31
|
+
class TestCrawler < Test::Unit::TestCase
|
32
|
+
include LocalHTTPServer
|
33
|
+
|
34
|
+
def setup
|
35
|
+
@session = Scrapes::Session::new
|
36
|
+
start_server
|
37
|
+
@crawler = Scrapes::Crawler.new @session
|
38
|
+
end
|
39
|
+
|
40
|
+
def teardown
|
41
|
+
stop_server
|
42
|
+
FileUtils.remove_dir 'cache' if File.exist? 'cache'
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_truth
|
46
|
+
assert @session
|
47
|
+
assert @server
|
48
|
+
assert @crawler
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_cache_attr
|
52
|
+
assert @crawler.cache
|
53
|
+
cache = Scrapes::Cache.new
|
54
|
+
assert_nothing_raised { @crawler.cache = cache }
|
55
|
+
assert_equal @crawler.cache, cache
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_log_attr
|
59
|
+
assert @crawler.log.nil?
|
60
|
+
log = Object.new
|
61
|
+
assert_nothing_raised { @crawler.log = log }
|
62
|
+
assert_equal @crawler.log, log
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_fetch
|
66
|
+
assert @crawler.fetch(localhost_url('foo.txt'))
|
67
|
+
assert_equal @crawler.fetch(localhost_url('dummy')).class, Net::HTTPNotFound
|
68
|
+
end
|
69
|
+
end
|
data/test/hpricot.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
################################################################################ #
|
2
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#
|
23
|
+
################################################################################
|
24
|
+
require 'rubygems'
|
25
|
+
################################################################################
|
26
|
+
require 'scrapes'
|
27
|
+
################################################################################
|
28
|
+
require 'test/lib/server'
|
29
|
+
require 'test/unit'
|
30
|
+
|
31
|
+
class TestRedhandedPage < Test::Unit::TestCase
|
32
|
+
include LocalHTTPServer
|
33
|
+
|
34
|
+
def setup
|
35
|
+
start_server
|
36
|
+
Scrapes::Initializer.run do |initializer|
|
37
|
+
initializer.pages_parent = 'test'
|
38
|
+
initializer.process
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def teardown
|
43
|
+
stop_server
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_truth
|
47
|
+
assert @server
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_texts
|
51
|
+
Scrapes::Session.start do |session|
|
52
|
+
@page = session.page(LocalRedhanded, localhost_url('redhanded.html'))
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/test/initializer.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
require 'rubygems'
|
26
|
+
require 'scrapes/initializer'
|
27
|
+
require 'scrapes/page'
|
28
|
+
require 'test/unit'
|
29
|
+
|
30
|
+
class TestInitializer < Test::Unit::TestCase
|
31
|
+
def setup
|
32
|
+
assert_nothing_raised { @initializer = Scrapes::Initializer.new }
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_run
|
36
|
+
assert_nothing_raised { Scrapes::Initializer.run { } }
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_pages_parent
|
40
|
+
assert_equal @initializer.pages_parent, File.dirname($0)
|
41
|
+
assert_nothing_raised { @initializer.pages_parent = 'foobar' }
|
42
|
+
assert_equal @initializer.pages_parent, 'foobar'
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_pages_dir
|
46
|
+
assert_equal @initializer.pages_dir, 'pages'
|
47
|
+
assert_nothing_raised { @initializer.pages_dir = 'foobar' }
|
48
|
+
assert_equal @initializer.pages_dir, 'foobar'
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_process
|
52
|
+
assert_nothing_raised { @initializer.process }
|
53
|
+
end
|
54
|
+
end
|
data/test/lib/server.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
# TODO figure out how to suppress get output
|
26
|
+
################################################################################
|
27
|
+
require 'logger'
|
28
|
+
require 'webrick'
|
29
|
+
require 'net/http'
|
30
|
+
################################################################################
|
31
|
+
# webrick localhost http server
|
32
|
+
module LocalHTTPServer
|
33
|
+
################################################################################
|
34
|
+
# start the server and return it
|
35
|
+
def start_server
|
36
|
+
@server = WEBrick::HTTPServer.new :Port=>4270, :Logger=>Logger.new(nil),
|
37
|
+
:DocumentRoot=>File.expand_path('test/public'), :AccessLog=>[]
|
38
|
+
@server_thread = Thread.new { @server.start }
|
39
|
+
end
|
40
|
+
|
41
|
+
################################################################################
|
42
|
+
# wait for server to shutdown and return it
|
43
|
+
def stop_server
|
44
|
+
if @server
|
45
|
+
@server.shutdown
|
46
|
+
@server_thread.join
|
47
|
+
@server
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
################################################################################
|
52
|
+
# return a localhost url given a doc path
|
53
|
+
def localhost_url path = nil
|
54
|
+
"http://localhost:4270/#{path}"
|
55
|
+
end
|
56
|
+
|
57
|
+
################################################################################
|
58
|
+
# get a page from the localhost http server
|
59
|
+
def localhost_http_get path = nil
|
60
|
+
Net::HTTP.get(URI.parse(localhost_url(path)))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
################################################################################
|
data/test/page.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
################################################################################ #
|
2
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#
|
23
|
+
################################################################################
|
24
|
+
require 'rubygems'
|
25
|
+
################################################################################
|
26
|
+
require 'scrapes'
|
27
|
+
################################################################################
|
28
|
+
require 'test/lib/server'
|
29
|
+
require 'test/unit'
|
30
|
+
|
31
|
+
class TestRedhandedPage < Test::Unit::TestCase
|
32
|
+
include LocalHTTPServer
|
33
|
+
|
34
|
+
def setup
|
35
|
+
start_server
|
36
|
+
Scrapes::Initializer.run do |initializer|
|
37
|
+
initializer.pages_parent = 'test'
|
38
|
+
initializer.process
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def teardown
|
43
|
+
stop_server
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_truth
|
47
|
+
assert @server
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_local_redhanded
|
51
|
+
Scrapes::Session.start do |session|
|
52
|
+
@page = session.page(LocalRedhanded, localhost_url('redhanded.html'))
|
53
|
+
end
|
54
|
+
assert_equal Array , @page.syndicate_content.class
|
55
|
+
assert_equal 2 , @page.syndicate_content.size
|
56
|
+
assert_equal '/index.xml' , @page.syndicate_link
|
57
|
+
assert_equal 'JavaScript' , @page.script_language
|
58
|
+
assert_equal 274 , @page.links.size
|
59
|
+
#assert_equal 0 , @page.element.size
|
60
|
+
assert_equal 'RSS' , @page.syndicate_content[0]
|
61
|
+
assert_equal '2.0' , @page.syndicate_content[1]
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_local_redhanded_enties
|
65
|
+
Scrapes::Session.start do |session|
|
66
|
+
@entries = session.page(LocalRedhandedEntries, localhost_url('redhanded.html'))
|
67
|
+
end
|
68
|
+
assert_equal 20, @entries.size
|
69
|
+
#assert_equal "Denver Accord#", @entries[0].entry_title
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_local_pagination_1
|
73
|
+
Scrapes::Session.start do |session|
|
74
|
+
@foil = session.page(LocalPagination, localhost_url('foil74.html'))
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|