manga-crawler 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/manga-crawler/crawler.rb +9 -5
- data/lib/manga-crawler/version.rb +1 -1
- data/test/fixtures/Bleach/chapters/1/1.html +2 -2
- data/test/fixtures/Bleach/chapters/1/2.html +2 -2
- data/test/fixtures/Bleach/chapters/1/broken-1.html +9 -0
- data/test/lib/manga-crawler/crawler_test.rb +28 -3
- metadata +4 -2
|
@@ -36,7 +36,7 @@ module MangaCrawler
|
|
|
36
36
|
def get_chapters manga_website
|
|
37
37
|
#TODO
|
|
38
38
|
#uses the same logic of get_mangas
|
|
39
|
-
return get_mangas manga_website
|
|
39
|
+
return self.get_mangas manga_website
|
|
40
40
|
end
|
|
41
41
|
|
|
42
42
|
def get_pages chapter_website, css_image_path
|
|
@@ -53,12 +53,12 @@ module MangaCrawler
|
|
|
53
53
|
|
|
54
54
|
params = Website::Parameters.new(chapter_website.params.base_url, current_url, css_image_path, :src)
|
|
55
55
|
|
|
56
|
-
result.push( get_image_from_page Website::Page.new(params) )
|
|
56
|
+
result.push( self.get_image_from_page Website::Page.new(params) )
|
|
57
57
|
end
|
|
58
58
|
|
|
59
59
|
end_time = Time.now
|
|
60
60
|
|
|
61
|
-
puts "\
|
|
61
|
+
puts "\nCollect pages completed!"
|
|
62
62
|
puts "Elapsed time: #{end_time-start_time} seconds."
|
|
63
63
|
|
|
64
64
|
return result
|
|
@@ -82,9 +82,13 @@ module MangaCrawler
|
|
|
82
82
|
|
|
83
83
|
def get_image_from_page image_website
|
|
84
84
|
|
|
85
|
-
|
|
85
|
+
begin
|
|
86
|
+
html_image = Nokogiri::HTML(open(image_website.params.current_url))
|
|
86
87
|
|
|
87
|
-
|
|
88
|
+
image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
|
|
89
|
+
rescue Exception => e
|
|
90
|
+
p "Error trying to access: #{image_website.params.current_url}"
|
|
91
|
+
end
|
|
88
92
|
|
|
89
93
|
return image_link
|
|
90
94
|
end
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
|
2
|
-
<option selected value="1.html">1</option>
|
|
3
|
-
<option value="2.html">2</option></select> of <strong>2</strong>
|
|
2
|
+
<option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
|
|
3
|
+
<option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
|
|
4
4
|
|
|
5
5
|
<div id="imgholder">
|
|
6
6
|
<a href="2.html">
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
|
2
|
-
<option selected value="1.html">1</option>
|
|
3
|
-
<option value="2.html">2</option></select> of <strong>2</strong>
|
|
2
|
+
<option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
|
|
3
|
+
<option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
|
|
4
4
|
|
|
5
5
|
<div id="imgholder">
|
|
6
6
|
<a href="../2/1.html">
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
|
2
|
+
<option selected value="test/fixtures/Bleach/chapters/1/broken.html">1</option>
|
|
3
|
+
<option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
|
|
4
|
+
|
|
5
|
+
<div id="imgholder">
|
|
6
|
+
<a href="2.html">
|
|
7
|
+
<img id="img" src="mushroom_risotto.jpg" alt="An image" name="img"/>
|
|
8
|
+
</a>
|
|
9
|
+
</div>
|
|
@@ -67,13 +67,38 @@ describe MangaCrawler::Crawler do
|
|
|
67
67
|
end
|
|
68
68
|
|
|
69
69
|
it "must collect all pages from a given chapter" do
|
|
70
|
-
|
|
70
|
+
|
|
71
|
+
link = "test/fixtures/Bleach/chapters/1/1.html"
|
|
72
|
+
sample_chapter_page = File.open(link)
|
|
73
|
+
|
|
74
|
+
base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/1.html/,"")
|
|
75
|
+
|
|
71
76
|
css_pages_path = "#page_switch option"
|
|
72
77
|
pages_html_field = :value
|
|
73
78
|
|
|
74
|
-
params = Website::Parameters.new(
|
|
79
|
+
params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
|
|
75
80
|
chapter_page = Website::Page.new(params)
|
|
76
81
|
|
|
77
|
-
crawler.get_pages chapter_page, "#
|
|
82
|
+
pages = crawler.get_pages chapter_page, "#img"
|
|
83
|
+
|
|
84
|
+
pages.must_equal ["mushroom_risotto.jpg", "vegetable_curry.jpg"]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it "must continue if some link is broken" do
|
|
88
|
+
|
|
89
|
+
link = "test/fixtures/Bleach/chapters/1/broken-1.html"
|
|
90
|
+
sample_chapter_page = File.open(link)
|
|
91
|
+
|
|
92
|
+
base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/broken-1.html/,"")
|
|
93
|
+
|
|
94
|
+
css_pages_path = "#page_switch option"
|
|
95
|
+
pages_html_field = :value
|
|
96
|
+
|
|
97
|
+
params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
|
|
98
|
+
chapter_page = Website::Page.new(params)
|
|
99
|
+
|
|
100
|
+
pages = crawler.get_pages chapter_page, "#img"
|
|
101
|
+
|
|
102
|
+
pages.must_equal [nil, "vegetable_curry.jpg"]
|
|
78
103
|
end
|
|
79
104
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: manga-crawler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2013-04-
|
|
12
|
+
date: 2013-04-22 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: bundler
|
|
@@ -80,6 +80,7 @@ files:
|
|
|
80
80
|
- test/fixtures/Bleach/bleach.html
|
|
81
81
|
- test/fixtures/Bleach/chapters/1/1.html
|
|
82
82
|
- test/fixtures/Bleach/chapters/1/2.html
|
|
83
|
+
- test/fixtures/Bleach/chapters/1/broken-1.html
|
|
83
84
|
- test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
|
|
84
85
|
- test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
|
|
85
86
|
- test/fixtures/Bleach/chapters/2/1.html
|
|
@@ -154,6 +155,7 @@ test_files:
|
|
|
154
155
|
- test/fixtures/Bleach/bleach.html
|
|
155
156
|
- test/fixtures/Bleach/chapters/1/1.html
|
|
156
157
|
- test/fixtures/Bleach/chapters/1/2.html
|
|
158
|
+
- test/fixtures/Bleach/chapters/1/broken-1.html
|
|
157
159
|
- test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
|
|
158
160
|
- test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
|
|
159
161
|
- test/fixtures/Bleach/chapters/2/1.html
|