skyscraper 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +180 -0
- data/Rakefile +5 -0
- data/lib/skyscraper.rb +56 -0
- data/lib/skyscraper/base.rb +44 -0
- data/lib/skyscraper/config.rb +15 -0
- data/lib/skyscraper/document.rb +11 -0
- data/lib/skyscraper/field.rb +24 -0
- data/lib/skyscraper/node.rb +8 -0
- data/lib/skyscraper/node/base.rb +103 -0
- data/lib/skyscraper/node/resource.rb +57 -0
- data/lib/skyscraper/pages.rb +27 -0
- data/lib/skyscraper/path.rb +29 -0
- data/lib/skyscraper/path/base.rb +15 -0
- data/lib/skyscraper/path/local.rb +29 -0
- data/lib/skyscraper/path/remote.rb +32 -0
- data/lib/skyscraper/results.rb +93 -0
- data/lib/version.rb +3 -0
- data/skyscraper.gemspec +22 -0
- data/spec/skyscraper/skyscraper/base_spec.rb +83 -0
- data/spec/skyscraper/skyscraper/config_spec.rb +25 -0
- data/spec/skyscraper/skyscraper/document_spec.rb +14 -0
- data/spec/skyscraper/skyscraper/field_spec.rb +36 -0
- data/spec/skyscraper/skyscraper/node/base_spec.rb +87 -0
- data/spec/skyscraper/skyscraper/node/resource_spec.rb +58 -0
- data/spec/skyscraper/skyscraper/node_spec.rb +2 -0
- data/spec/skyscraper/skyscraper/pages_spec.rb +46 -0
- data/spec/skyscraper/skyscraper/path_spec.rb +110 -0
- data/spec/skyscraper/skyscraper/results_spec.rb +151 -0
- data/spec/skyscraper/skyscraper_spec.rb +39 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/support/skyscraper_helpers.rb +9 -0
- data/spec/test_files/encoding.html~ +12 -0
- data/spec/test_files/skyscraper-base.html +30 -0
- data/spec/test_files/skyscraper-document.html +30 -0
- data/spec/test_files/skyscraper-encoding.html +12 -0
- data/spec/test_files/skyscraper-fetch-2.html +11 -0
- data/spec/test_files/skyscraper-fetch.html +31 -0
- data/spec/test_files/skyscraper-field.html +30 -0
- data/spec/test_files/skyscraper-node-base-a.html +11 -0
- data/spec/test_files/skyscraper-node-base-b.html +10 -0
- data/spec/test_files/skyscraper-node-base-traversing.html +34 -0
- data/spec/test_files/skyscraper-node-base.html +30 -0
- data/spec/test_files/skyscraper-node-resource-b.html +10 -0
- data/spec/test_files/skyscraper-node-resource-image.png +0 -0
- data/spec/test_files/skyscraper-node-resource.html +12 -0
- data/spec/test_files/skyscraper-pages.html +30 -0
- data/spec/test_files/skyscraper.html +30 -0
- metadata +169 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
describe Skyscraper::Field do
|
2
|
+
before(:all) do
|
3
|
+
@page = Skyscraper::fetch(path_to("skyscraper-field.html"))
|
4
|
+
end
|
5
|
+
|
6
|
+
it "should find field value using css selector" do
|
7
|
+
field = Skyscraper::Field.new name: :name, selector: ".item strong.name"
|
8
|
+
field.find_in_document @page
|
9
|
+
field.value.should == "Name value"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should apply callback" do
|
13
|
+
callback = proc { |item| item.href * 2 }
|
14
|
+
field = Skyscraper::Field.new name: :name, selector: "a", callback: callback
|
15
|
+
field.find_in_document @page
|
16
|
+
field.value.should == "a.htmla.html"
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should read attributes from elements" do
|
20
|
+
field = Skyscraper::Field.new name: :name, selector: "a", attribute: :href
|
21
|
+
field.find_in_document @page
|
22
|
+
field.value.should == "a.html"
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should returns text code of inner element by default" do
|
26
|
+
field = Skyscraper::Field.new name: :name, selector: ".item"
|
27
|
+
field.find_in_document @page
|
28
|
+
field.value.should include "Name value"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should returns html code of inner element" do
|
32
|
+
field = Skyscraper::Field.new name: :name, selector: ".item", attribute: "html"
|
33
|
+
field.find_in_document @page
|
34
|
+
field.value.should include "<strong class=\"name\">Name value</strong>"
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
describe Skyscraper::Node::Base do
|
2
|
+
describe "when is initialized" do
|
3
|
+
before(:each) do
|
4
|
+
@node = Skyscraper::fetch(path_to("skyscraper-node-base.html")).first("div.item")
|
5
|
+
end
|
6
|
+
|
7
|
+
it "should returns html code" do
|
8
|
+
@node.html.should include "<strong class=\"name\">Name value</strong>"
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should returns class name" do
|
12
|
+
@node.class.should == "item"
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should be auto converted to string with stripped tags" do
|
16
|
+
@node.text.should == "Name value"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should follow links" do
|
21
|
+
Skyscraper::fetch(path_to("skyscraper-node-base.html")).first("li a").follow.first("h1").text.should == "Hello from A"
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should deep follow links" do
|
25
|
+
Skyscraper::fetch(path_to("skyscraper-node-base.html")).first("li a").follow.first("a").follow.first("h1").text.should == "Hello from B"
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should download page" do
|
29
|
+
remove_test_directory
|
30
|
+
Skyscraper.config.download_path = "/tmp/skyscraper_test/nodes/:file_name"
|
31
|
+
file = Skyscraper::fetch(path_to("skyscraper-node-base.html")).first("li a").follow.first("a").download
|
32
|
+
File.exists?(file).should == true
|
33
|
+
end
|
34
|
+
describe "traversing" do
|
35
|
+
before(:each) do
|
36
|
+
@node = Skyscraper::fetch(path_to("skyscraper-node-base-traversing.html")).first(".menu")
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should find descendands items" do
|
40
|
+
result = @node.find("li")
|
41
|
+
result.length.should == 5
|
42
|
+
result.map(&:text).should include "Item 4 1"
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should returns children of element with selector" do
|
46
|
+
node = Skyscraper::fetch(path_to("skyscraper-node-base-traversing.html")).first("#parent-3")
|
47
|
+
node.children(".a").length.should == 4
|
48
|
+
node.children(".b").length.should == 2
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should returns children of element without selector" do
|
52
|
+
result = @node.children
|
53
|
+
result.length.should == 4
|
54
|
+
result.map(&:to_s).should_not include "Item 4 1"
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should returns first element" do
|
58
|
+
@node.first("li").class.should == "item-1"
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should returns parent of item" do
|
62
|
+
@node.parent.class.should == "parent-2"
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should tells if element have parent" do
|
66
|
+
@node.have_parent?.should == true
|
67
|
+
@node.parents("html").first.have_parent?.should == false
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should returns parents of item" do
|
71
|
+
@node.parents.length.should == 4
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should returns parents of item matched by selector" do
|
75
|
+
@node.parents("div").length.should == 2
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should returns siblings of item" do
|
79
|
+
@node.first(".item-3").siblings.length.should == 3
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should returns node tag" do
|
83
|
+
@node.tag.should == "ul"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
describe Skyscraper::Node::Resource do
|
2
|
+
def should_download_resource_to node, path, options = {}
|
3
|
+
resource = Skyscraper::Node::Resource.new(node)
|
4
|
+
resource.download(options).should == path
|
5
|
+
end
|
6
|
+
|
7
|
+
before(:all) do
|
8
|
+
Skyscraper.config.download_path = "/tmp/skyscraper_test/:sequence/:file_name"
|
9
|
+
@node = Skyscraper::fetch(path_to("skyscraper-node-resource.html")).first("a")
|
10
|
+
end
|
11
|
+
|
12
|
+
before(:each) do
|
13
|
+
remove_test_directory
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should create path if not exists when downloaded" do
|
17
|
+
File.directory?("/tmp/skyscraper_test/1").should == false
|
18
|
+
Skyscraper::Node::Resource.new(@node)
|
19
|
+
File.directory?("/tmp/skyscraper_test/1").should == false
|
20
|
+
Skyscraper::Node::Resource.new(@node).download
|
21
|
+
File.directory?("/tmp/skyscraper_test/1").should == true
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should not fail if path already exists" do
|
25
|
+
Skyscraper::Node::Resource.new(@node).download path: "/tmp/skyscraper_test/some_directory/:file_name"
|
26
|
+
File.directory?("/tmp/skyscraper_test/some_directory").should == true
|
27
|
+
Skyscraper::Node::Resource.new(@node).download
|
28
|
+
File.directory?("/tmp/skyscraper_test/some_directory").should == true
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should have file name" do
|
32
|
+
resource = Skyscraper::Node::Resource.new(@node)
|
33
|
+
resource.download.should == "/tmp/skyscraper_test/1/skyscraper-node-resource-b.html"
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should create path with :sequence variable" do
|
37
|
+
download_to = "/tmp/skyscraper_test/sequences/:sequence/:file_name"
|
38
|
+
should_download_resource_to @node, "/tmp/skyscraper_test/sequences/1/skyscraper-node-resource-b.html", path: download_to
|
39
|
+
should_download_resource_to @node, "/tmp/skyscraper_test/sequences/2/skyscraper-node-resource-b.html", path: download_to
|
40
|
+
should_download_resource_to @node, "/tmp/skyscraper_test/sequences/3/skyscraper-node-resource-b.html", path: download_to
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should create custom file name if provided" do
|
44
|
+
download_to = "/tmp/skyscraper_test/custom_name/:file_name"
|
45
|
+
should_download_resource_to @node, "/tmp/skyscraper_test/custom_name/test.html", path: download_to, file_name: "test.html"
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should download resource" do
|
49
|
+
Skyscraper::Node::Resource.new(@node).download
|
50
|
+
File.exists?("/tmp/skyscraper_test/1/skyscraper-node-resource-b.html").should == true
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should download image" do
|
54
|
+
image_node = Skyscraper::fetch(path_to("skyscraper-node-resource.html")).first("img")
|
55
|
+
Skyscraper::Node::Resource.new(image_node).download
|
56
|
+
File.exists?("/tmp/skyscraper_test/1/skyscraper-node-resource-image.png").should == true
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
describe Skyscraper::Pages do
|
2
|
+
it "should set convert string to items array" do
|
3
|
+
Skyscraper::Pages.new("http://google.com").items.should == ["http://google.com"]
|
4
|
+
end
|
5
|
+
|
6
|
+
it "should set items array from array" do
|
7
|
+
Skyscraper::Pages.new(["http://google.com"]).items.should == ["http://google.com"]
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should flat pages from nested arrays" do
|
11
|
+
Skyscraper::Pages.new(["http://google.com", ["http://yahoo.com"]]).items.should == ["http://google.com", "http://yahoo.com"]
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should set array from block" do
|
15
|
+
Skyscraper::Pages.new do
|
16
|
+
2.times.map { |i| "http://google.com/#{i}.html"}
|
17
|
+
end.items.should == ["http://google.com/0.html", "http://google.com/1.html"]
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should pass scraper instance to block" do
|
21
|
+
Skyscraper::Pages.new do |scraper|
|
22
|
+
scraper.fetch(path_to("skyscraper-pages.html")).first("a").href
|
23
|
+
end.items.should == ["a.html"]
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should works when block is passed without arguments" do
|
27
|
+
Skyscraper::Pages.new do
|
28
|
+
"a.html"
|
29
|
+
end.items.should == ["a.html"]
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should return next item" do
|
33
|
+
pages = Skyscraper::Pages.new(["a", "b", "c"])
|
34
|
+
pages.next.should == "a"
|
35
|
+
pages.next.should == "b"
|
36
|
+
pages.next.should == "c"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should reset pages" do
|
40
|
+
pages = Skyscraper::Pages.new(["a", "b", "c"])
|
41
|
+
pages.next.should == "a"
|
42
|
+
pages.next.should == "b"
|
43
|
+
pages.reset
|
44
|
+
pages.next.should == "a"
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Skyscraper::Path do
|
4
|
+
describe "when path is REMOTE" do
|
5
|
+
before(:each) do
|
6
|
+
@path = Skyscraper::Path.factory("http://google.com/index.php?q=e")
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should returns domain" do
|
10
|
+
@path.domain.should == "http://google.com"
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should returns domain with no scheme" do
|
14
|
+
@path = Skyscraper::Path.factory("google.com/index.php?q=e")
|
15
|
+
@path.full_path.should == "google.com/index.php?q=e"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should returns path" do
|
19
|
+
@path.path.should == "/index.php"
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should returns query" do
|
23
|
+
@path.query.should == "?q=e"
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should returns base" do
|
27
|
+
@path.base.should == "http://google.com/"
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should returns full path" do
|
31
|
+
@path.full_path.should == "http://google.com/index.php?q=e"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should be converted to string" do
|
35
|
+
@path.to_s.should == @path.full_path
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should returns full path for full different" do
|
39
|
+
path = Skyscraper::Path.factory("http://google.com/a/index.php")
|
40
|
+
path.full_path_for("http://yahoo.com/b.html").should == "http://yahoo.com/b.html"
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should returns full path for relative" do
|
44
|
+
path = Skyscraper::Path.factory("http://google.com/a/index.php")
|
45
|
+
path.full_path_for("b.html").should == "http://google.com/a/b.html"
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should returns full path for absolute" do
|
49
|
+
path = Skyscraper::Path.factory("http://google.com/a/index.php")
|
50
|
+
path.full_path_for("/b.html").should == "http://google.com/b.html"
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should returns file name" do
|
54
|
+
path = Skyscraper::Path.factory("http://google.com/a/index.php")
|
55
|
+
path.file_name.should == "index.php"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "when path is LOCAL" do
|
60
|
+
before(:each) do
|
61
|
+
@path = Skyscraper::Path.factory("/var/www/files/file.ext")
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should returns folder" do
|
65
|
+
@path.folder.should == "/var/www/files/"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should returns file name" do
|
69
|
+
@path.file_name.should == "file.ext"
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should returns full path" do
|
73
|
+
@path.full_path.should == "/var/www/files/file.ext"
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should returns base" do
|
77
|
+
@path.base.should == "/var/www/files/"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should returns full path for relative" do
|
81
|
+
path = Skyscraper::Path.factory("/var/www/public/index.html")
|
82
|
+
path.full_path_for("../b.html").should == "/var/www/public/../b.html"
|
83
|
+
path.full_path_for("b.html").should == "/var/www/public/b.html"
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should returns full path for absolute full" do
|
87
|
+
path = Skyscraper::Path.factory("/var/www/public/index.html")
|
88
|
+
path.full_path_for("/var/www/test.html").should == "/var/www/test.html"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should detect if string is remote " do
|
93
|
+
Skyscraper::Path.remote?("http://google.com").should == true
|
94
|
+
Skyscraper::Path.remote?("google.com").should == true
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should detect if string is not remote " do
|
98
|
+
Skyscraper::Path.remote?("/var/www/projects").should == false
|
99
|
+
Skyscraper::Path.remote?("/var/www/projects/file.ext").should == false
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should check if is absolute address" do
|
103
|
+
Skyscraper::Path.absolute?("/some/relative/path").should == true
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should returns nil for wrong path" do
|
107
|
+
path = Skyscraper::Path.factory("/var/www/files/")
|
108
|
+
path.file_name.should == nil
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
describe Skyscraper::Results do
|
2
|
+
def fetch options = {}
|
3
|
+
options.reverse_merge! fields: {}, options: {}
|
4
|
+
base = Skyscraper::Base.new
|
5
|
+
base.pages options[:path]
|
6
|
+
|
7
|
+
options[:fields].each_pair do |key, value|
|
8
|
+
base.field key, value
|
9
|
+
end
|
10
|
+
|
11
|
+
Skyscraper::Results.new(base, options[:options]).fetch
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should fetch file content" do
|
15
|
+
results = fetch path: path_to("skyscraper-fetch.html"), fields: { h1: "h1" }
|
16
|
+
results[0][:h1].should == "Hello world"
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should fetch with delay" do
|
20
|
+
time = Time.now
|
21
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 2, fields: { title: "title" }, options: { delay: 1 }
|
22
|
+
time_diff = Time.now - time
|
23
|
+
time_diff.should > 1
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should fetch with delay after" do
|
27
|
+
time = Time.now
|
28
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 10, options: { delay: { sleep: 1, after: 7 } }
|
29
|
+
time_diff = Time.now - time
|
30
|
+
time_diff.should > 1
|
31
|
+
time_diff.should < 3
|
32
|
+
|
33
|
+
time = Time.now
|
34
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 10, options: { delay: { sleep: 1, after: 11 } }
|
35
|
+
time_diff = Time.now - time
|
36
|
+
time_diff.should < 1
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should fetch with results limit" do
|
40
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 11, options: { limit: 10 }
|
41
|
+
results.length.should == 10
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should apply config defaults" do
|
45
|
+
base = Skyscraper::Base.new
|
46
|
+
base.config.limit = 2
|
47
|
+
base.pages [path_to("skyscraper-fetch.html")] * 10
|
48
|
+
|
49
|
+
results = Skyscraper::Results.new(base).fetch
|
50
|
+
results.length.should == 2
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should continue after limit reached" do
|
54
|
+
base = Skyscraper::Base.new
|
55
|
+
base.config.limit = 1
|
56
|
+
base.pages [path_to("skyscraper-fetch.html"), path_to("skyscraper-fetch-2.html")]
|
57
|
+
base.field :h1, "h1"
|
58
|
+
|
59
|
+
results = Skyscraper::Results.new(base)
|
60
|
+
results.fetch.length.should == 1
|
61
|
+
results.records.length.should == 1
|
62
|
+
results.continue.length.should == 1
|
63
|
+
results.records.length.should == 2
|
64
|
+
results.records[1][:h1].should == "Hello from A"
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "callbacks" do
|
68
|
+
it "should calls after each page callback" do
|
69
|
+
@call_count = 0
|
70
|
+
callback = proc do |result, page|
|
71
|
+
result.should be_an_instance_of(Hash)
|
72
|
+
page.should be_an_instance_of(Skyscraper::Node::Base)
|
73
|
+
@call_count += 1
|
74
|
+
end
|
75
|
+
|
76
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 10, options: { after_each: [callback] }
|
77
|
+
|
78
|
+
@call_count.should == 10
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should calls after all callback" do
|
82
|
+
@call_count = 0
|
83
|
+
callback = proc do |results|
|
84
|
+
results.should be_an_instance_of(Array)
|
85
|
+
@call_count += 1
|
86
|
+
end
|
87
|
+
|
88
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 10, options: { after_all: [callback] }
|
89
|
+
|
90
|
+
@call_count.should == 1
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should change result value for each" do
|
94
|
+
callback = proc do |result, page|
|
95
|
+
result[:h1] += " with callback"
|
96
|
+
end
|
97
|
+
|
98
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 10, fields: { h1: "h1" }, options: { after_each: [callback] }
|
99
|
+
results[0][:h1].should == "Hello world with callback"
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should change results values for all" do
|
103
|
+
callback = proc do |results|
|
104
|
+
results << "test"
|
105
|
+
end
|
106
|
+
|
107
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 10, fields: { h1: "h1" }, options: { after_all: [callback] }
|
108
|
+
results.last.should == "test"
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should doesn't requires callback arguments" do
|
112
|
+
callback = proc do
|
113
|
+
"with callback"
|
114
|
+
end
|
115
|
+
|
116
|
+
results = fetch path: [path_to("skyscraper-fetch.html")] * 10, fields: { h1: "h1" }, options: { after_each: [callback] }
|
117
|
+
results[0][:h1].should == "Hello world"
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
describe "errors" do
|
123
|
+
before(:all) do
|
124
|
+
Skyscraper.config.noise_errors = false
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should catch invalid url exception" do
|
128
|
+
Skyscraper.config.skip_on_error = false
|
129
|
+
lambda do
|
130
|
+
fetch path: "http://google.wrong"
|
131
|
+
end.should raise_error Skyscraper::NoResourceException
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should catch file not exists exception" do
|
135
|
+
Skyscraper.config.skip_on_error = false
|
136
|
+
lambda do
|
137
|
+
fetch path: "/tmp/skyscraper/unknow_file"
|
138
|
+
end.should raise_error Skyscraper::NoResourceException
|
139
|
+
end
|
140
|
+
|
141
|
+
it "should skip on error" do
|
142
|
+
Skyscraper.config.skip_on_error = true
|
143
|
+
begin
|
144
|
+
fetch path: "http://google.wrong"
|
145
|
+
rescue Skyscraper::NoResourceException
|
146
|
+
catched = true
|
147
|
+
end
|
148
|
+
catched.should == nil
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|