RubyGems - manga-crawler - Versions diffs - 0.1.2 → 0.2.0 - Mend

manga-crawler 0.1.2 → 0.2.0

Files changed (7) hide show

@@ -36,7 +36,7 @@ module MangaCrawler
     def get_chapters manga_website
       #TODO
       #uses the same logic of get_mangas
-      return get_mangas manga_website
+      return self.get_mangas manga_website
     end
     def get_pages chapter_website, css_image_path
@@ -53,12 +53,12 @@ module MangaCrawler
         params = Website::Parameters.new(chapter_website.params.base_url, current_url, css_image_path, :src)
-        result.push( get_image_from_page Website::Page.new(params) )
+        result.push( self.get_image_from_page Website::Page.new(params) )
       end
       end_time = Time.now
-      puts "\mCollect pages completed!"
+      puts "\nCollect pages completed!"
       puts "Elapsed time: #{end_time-start_time} seconds."
       return result
@@ -82,9 +82,13 @@ module MangaCrawler
     def get_image_from_page image_website
-      html_image = Nokogiri::HTML(open(image_website.params.current_url))
+      begin
+        html_image = Nokogiri::HTML(open(image_website.params.current_url))
-      image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
+        image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
+      rescue Exception => e
+        p "Error trying to access: #{image_website.params.current_url}"
+      end
       return image_link
     end

@@ -1,3 +1,3 @@
 module MangaCrawler
-  VERSION = "0.1.2"
+  VERSION = "0.2.0"
 end

@@ -1,6 +1,6 @@
 <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
-  <option selected value="1.html">1</option>
-  <option  value="2.html">2</option></select> of <strong>2</strong>
+  <option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
+  <option  value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
 <div id="imgholder">
   <a href="2.html">

@@ -1,6 +1,6 @@
 <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
-  <option selected value="1.html">1</option>
-  <option  value="2.html">2</option></select> of <strong>2</strong>
+  <option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
+  <option  value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
 <div id="imgholder">
   <a href="../2/1.html">

@@ -0,0 +1,9 @@
+<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
+  <option selected value="test/fixtures/Bleach/chapters/1/broken.html">1</option>
+  <option  value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
+<div id="imgholder">
+  <a href="2.html">
+    <img id="img" src="mushroom_risotto.jpg" alt="An image" name="img"/>
+  </a>
+</div>

@@ -67,13 +67,38 @@ describe MangaCrawler::Crawler do
   end
   it "must collect all pages from a given chapter" do
-    link = "https://starkana.me/manga/0/A_Princess_and_a_Bum_(Manhwa)/chapter/7"
+    link = "test/fixtures/Bleach/chapters/1/1.html"
+    sample_chapter_page = File.open(link)
+    base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/1.html/,"")
     css_pages_path = "#page_switch option"
     pages_html_field = :value
-    params = Website::Parameters.new("https://starkana.me", link, css_pages_path, pages_html_field)
+    params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
     chapter_page = Website::Page.new(params)
-    crawler.get_pages chapter_page, "#pic img"
+    pages = crawler.get_pages chapter_page, "#img"
+    pages.must_equal ["mushroom_risotto.jpg", "vegetable_curry.jpg"]
+  end
+  it "must continue if some link is broken" do
+    link = "test/fixtures/Bleach/chapters/1/broken-1.html"
+    sample_chapter_page = File.open(link)
+    base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/broken-1.html/,"")
+    css_pages_path = "#page_switch option"
+    pages_html_field = :value
+    params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
+    chapter_page = Website::Page.new(params)
+    pages = crawler.get_pages chapter_page, "#img"
+    pages.must_equal [nil, "vegetable_curry.jpg"]
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: manga-crawler
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.2.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-04-19 00:00:00.000000000 Z
+date: 2013-04-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -80,6 +80,7 @@ files:
 - test/fixtures/Bleach/bleach.html
 - test/fixtures/Bleach/chapters/1/1.html
 - test/fixtures/Bleach/chapters/1/2.html
+- test/fixtures/Bleach/chapters/1/broken-1.html
 - test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
 - test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
 - test/fixtures/Bleach/chapters/2/1.html
@@ -154,6 +155,7 @@ test_files:
 - test/fixtures/Bleach/bleach.html
 - test/fixtures/Bleach/chapters/1/1.html
 - test/fixtures/Bleach/chapters/1/2.html
+- test/fixtures/Bleach/chapters/1/broken-1.html
 - test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
 - test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
 - test/fixtures/Bleach/chapters/2/1.html