manga-crawler 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -36,7 +36,7 @@ module MangaCrawler
36
36
  def get_chapters manga_website
37
37
  #TODO
38
38
  #uses the same logic of get_mangas
39
- return get_mangas manga_website
39
+ return self.get_mangas manga_website
40
40
  end
41
41
 
42
42
  def get_pages chapter_website, css_image_path
@@ -53,12 +53,12 @@ module MangaCrawler
53
53
 
54
54
  params = Website::Parameters.new(chapter_website.params.base_url, current_url, css_image_path, :src)
55
55
 
56
- result.push( get_image_from_page Website::Page.new(params) )
56
+ result.push( self.get_image_from_page Website::Page.new(params) )
57
57
  end
58
58
 
59
59
  end_time = Time.now
60
60
 
61
- puts "\mCollect pages completed!"
61
+ puts "\nCollect pages completed!"
62
62
  puts "Elapsed time: #{end_time-start_time} seconds."
63
63
 
64
64
  return result
@@ -82,9 +82,13 @@ module MangaCrawler
82
82
 
83
83
  def get_image_from_page image_website
84
84
 
85
- html_image = Nokogiri::HTML(open(image_website.params.current_url))
85
+ begin
86
+ html_image = Nokogiri::HTML(open(image_website.params.current_url))
86
87
 
87
- image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
88
+ image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
89
+ rescue Exception => e
90
+ p "Error trying to access: #{image_website.params.current_url}"
91
+ end
88
92
 
89
93
  return image_link
90
94
  end
@@ -1,3 +1,3 @@
1
1
  module MangaCrawler
2
- VERSION = "0.1.2"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,6 +1,6 @@
1
1
  <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
- <option selected value="1.html">1</option>
3
- <option value="2.html">2</option></select> of <strong>2</strong>
2
+ <option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
3
+ <option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
4
4
 
5
5
  <div id="imgholder">
6
6
  <a href="2.html">
@@ -1,6 +1,6 @@
1
1
  <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
- <option selected value="1.html">1</option>
3
- <option value="2.html">2</option></select> of <strong>2</strong>
2
+ <option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
3
+ <option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
4
4
 
5
5
  <div id="imgholder">
6
6
  <a href="../2/1.html">
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="test/fixtures/Bleach/chapters/1/broken.html">1</option>
3
+ <option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="2.html">
7
+ <img id="img" src="mushroom_risotto.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -67,13 +67,38 @@ describe MangaCrawler::Crawler do
67
67
  end
68
68
 
69
69
  it "must collect all pages from a given chapter" do
70
- link = "https://starkana.me/manga/0/A_Princess_and_a_Bum_(Manhwa)/chapter/7"
70
+
71
+ link = "test/fixtures/Bleach/chapters/1/1.html"
72
+ sample_chapter_page = File.open(link)
73
+
74
+ base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/1.html/,"")
75
+
71
76
  css_pages_path = "#page_switch option"
72
77
  pages_html_field = :value
73
78
 
74
- params = Website::Parameters.new("https://starkana.me", link, css_pages_path, pages_html_field)
79
+ params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
75
80
  chapter_page = Website::Page.new(params)
76
81
 
77
- crawler.get_pages chapter_page, "#pic img"
82
+ pages = crawler.get_pages chapter_page, "#img"
83
+
84
+ pages.must_equal ["mushroom_risotto.jpg", "vegetable_curry.jpg"]
85
+ end
86
+
87
+ it "must continue if some link is broken" do
88
+
89
+ link = "test/fixtures/Bleach/chapters/1/broken-1.html"
90
+ sample_chapter_page = File.open(link)
91
+
92
+ base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/broken-1.html/,"")
93
+
94
+ css_pages_path = "#page_switch option"
95
+ pages_html_field = :value
96
+
97
+ params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
98
+ chapter_page = Website::Page.new(params)
99
+
100
+ pages = crawler.get_pages chapter_page, "#img"
101
+
102
+ pages.must_equal [nil, "vegetable_curry.jpg"]
78
103
  end
79
104
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manga-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-19 00:00:00.000000000 Z
12
+ date: 2013-04-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -80,6 +80,7 @@ files:
80
80
  - test/fixtures/Bleach/bleach.html
81
81
  - test/fixtures/Bleach/chapters/1/1.html
82
82
  - test/fixtures/Bleach/chapters/1/2.html
83
+ - test/fixtures/Bleach/chapters/1/broken-1.html
83
84
  - test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
84
85
  - test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
85
86
  - test/fixtures/Bleach/chapters/2/1.html
@@ -154,6 +155,7 @@ test_files:
154
155
  - test/fixtures/Bleach/bleach.html
155
156
  - test/fixtures/Bleach/chapters/1/1.html
156
157
  - test/fixtures/Bleach/chapters/1/2.html
158
+ - test/fixtures/Bleach/chapters/1/broken-1.html
157
159
  - test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
158
160
  - test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
159
161
  - test/fixtures/Bleach/chapters/2/1.html