manga-crawler 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/manga-crawler/crawler.rb +9 -5
- data/lib/manga-crawler/version.rb +1 -1
- data/test/fixtures/Bleach/chapters/1/1.html +2 -2
- data/test/fixtures/Bleach/chapters/1/2.html +2 -2
- data/test/fixtures/Bleach/chapters/1/broken-1.html +9 -0
- data/test/lib/manga-crawler/crawler_test.rb +28 -3
- metadata +4 -2
@@ -36,7 +36,7 @@ module MangaCrawler
|
|
36
36
|
def get_chapters manga_website
|
37
37
|
#TODO
|
38
38
|
#uses the same logic of get_mangas
|
39
|
-
return get_mangas manga_website
|
39
|
+
return self.get_mangas manga_website
|
40
40
|
end
|
41
41
|
|
42
42
|
def get_pages chapter_website, css_image_path
|
@@ -53,12 +53,12 @@ module MangaCrawler
|
|
53
53
|
|
54
54
|
params = Website::Parameters.new(chapter_website.params.base_url, current_url, css_image_path, :src)
|
55
55
|
|
56
|
-
result.push( get_image_from_page Website::Page.new(params) )
|
56
|
+
result.push( self.get_image_from_page Website::Page.new(params) )
|
57
57
|
end
|
58
58
|
|
59
59
|
end_time = Time.now
|
60
60
|
|
61
|
-
puts "\
|
61
|
+
puts "\nCollect pages completed!"
|
62
62
|
puts "Elapsed time: #{end_time-start_time} seconds."
|
63
63
|
|
64
64
|
return result
|
@@ -82,9 +82,13 @@ module MangaCrawler
|
|
82
82
|
|
83
83
|
def get_image_from_page image_website
|
84
84
|
|
85
|
-
|
85
|
+
begin
|
86
|
+
html_image = Nokogiri::HTML(open(image_website.params.current_url))
|
86
87
|
|
87
|
-
|
88
|
+
image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
|
89
|
+
rescue Exception => e
|
90
|
+
p "Error trying to access: #{image_website.params.current_url}"
|
91
|
+
end
|
88
92
|
|
89
93
|
return image_link
|
90
94
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
-
<option selected value="1.html">1</option>
|
3
|
-
<option value="2.html">2</option></select> of <strong>2</strong>
|
2
|
+
<option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
|
3
|
+
<option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
|
4
4
|
|
5
5
|
<div id="imgholder">
|
6
6
|
<a href="2.html">
|
@@ -1,6 +1,6 @@
|
|
1
1
|
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
-
<option selected value="1.html">1</option>
|
3
|
-
<option value="2.html">2</option></select> of <strong>2</strong>
|
2
|
+
<option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
|
3
|
+
<option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
|
4
4
|
|
5
5
|
<div id="imgholder">
|
6
6
|
<a href="../2/1.html">
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="test/fixtures/Bleach/chapters/1/broken.html">1</option>
|
3
|
+
<option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="2.html">
|
7
|
+
<img id="img" src="mushroom_risotto.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -67,13 +67,38 @@ describe MangaCrawler::Crawler do
|
|
67
67
|
end
|
68
68
|
|
69
69
|
it "must collect all pages from a given chapter" do
|
70
|
-
|
70
|
+
|
71
|
+
link = "test/fixtures/Bleach/chapters/1/1.html"
|
72
|
+
sample_chapter_page = File.open(link)
|
73
|
+
|
74
|
+
base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/1.html/,"")
|
75
|
+
|
71
76
|
css_pages_path = "#page_switch option"
|
72
77
|
pages_html_field = :value
|
73
78
|
|
74
|
-
params = Website::Parameters.new(
|
79
|
+
params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
|
75
80
|
chapter_page = Website::Page.new(params)
|
76
81
|
|
77
|
-
crawler.get_pages chapter_page, "#
|
82
|
+
pages = crawler.get_pages chapter_page, "#img"
|
83
|
+
|
84
|
+
pages.must_equal ["mushroom_risotto.jpg", "vegetable_curry.jpg"]
|
85
|
+
end
|
86
|
+
|
87
|
+
it "must continue if some link is broken" do
|
88
|
+
|
89
|
+
link = "test/fixtures/Bleach/chapters/1/broken-1.html"
|
90
|
+
sample_chapter_page = File.open(link)
|
91
|
+
|
92
|
+
base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/broken-1.html/,"")
|
93
|
+
|
94
|
+
css_pages_path = "#page_switch option"
|
95
|
+
pages_html_field = :value
|
96
|
+
|
97
|
+
params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
|
98
|
+
chapter_page = Website::Page.new(params)
|
99
|
+
|
100
|
+
pages = crawler.get_pages chapter_page, "#img"
|
101
|
+
|
102
|
+
pages.must_equal [nil, "vegetable_curry.jpg"]
|
78
103
|
end
|
79
104
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: manga-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -80,6 +80,7 @@ files:
|
|
80
80
|
- test/fixtures/Bleach/bleach.html
|
81
81
|
- test/fixtures/Bleach/chapters/1/1.html
|
82
82
|
- test/fixtures/Bleach/chapters/1/2.html
|
83
|
+
- test/fixtures/Bleach/chapters/1/broken-1.html
|
83
84
|
- test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
|
84
85
|
- test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
|
85
86
|
- test/fixtures/Bleach/chapters/2/1.html
|
@@ -154,6 +155,7 @@ test_files:
|
|
154
155
|
- test/fixtures/Bleach/bleach.html
|
155
156
|
- test/fixtures/Bleach/chapters/1/1.html
|
156
157
|
- test/fixtures/Bleach/chapters/1/2.html
|
158
|
+
- test/fixtures/Bleach/chapters/1/broken-1.html
|
157
159
|
- test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
|
158
160
|
- test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
|
159
161
|
- test/fixtures/Bleach/chapters/2/1.html
|