manga-crawler 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,7 +36,7 @@ module MangaCrawler
36
36
  def get_chapters manga_website
37
37
  #TODO
38
38
  #uses the same logic of get_mangas
39
- return get_mangas manga_website
39
+ return self.get_mangas manga_website
40
40
  end
41
41
 
42
42
  def get_pages chapter_website, css_image_path
@@ -53,12 +53,12 @@ module MangaCrawler
53
53
 
54
54
  params = Website::Parameters.new(chapter_website.params.base_url, current_url, css_image_path, :src)
55
55
 
56
- result.push( get_image_from_page Website::Page.new(params) )
56
+ result.push( self.get_image_from_page Website::Page.new(params) )
57
57
  end
58
58
 
59
59
  end_time = Time.now
60
60
 
61
- puts "\mCollect pages completed!"
61
+ puts "\nCollect pages completed!"
62
62
  puts "Elapsed time: #{end_time-start_time} seconds."
63
63
 
64
64
  return result
@@ -82,9 +82,13 @@ module MangaCrawler
82
82
 
83
83
  def get_image_from_page image_website
84
84
 
85
- html_image = Nokogiri::HTML(open(image_website.params.current_url))
85
+ begin
86
+ html_image = Nokogiri::HTML(open(image_website.params.current_url))
86
87
 
87
- image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
88
+ image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
89
+ rescue Exception => e
90
+ p "Error trying to access: #{image_website.params.current_url}"
91
+ end
88
92
 
89
93
  return image_link
90
94
  end
@@ -1,3 +1,3 @@
1
1
  module MangaCrawler
2
- VERSION = "0.1.2"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,6 +1,6 @@
1
1
  <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
- <option selected value="1.html">1</option>
3
- <option value="2.html">2</option></select> of <strong>2</strong>
2
+ <option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
3
+ <option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
4
4
 
5
5
  <div id="imgholder">
6
6
  <a href="2.html">
@@ -1,6 +1,6 @@
1
1
  <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
- <option selected value="1.html">1</option>
3
- <option value="2.html">2</option></select> of <strong>2</strong>
2
+ <option selected value="test/fixtures/Bleach/chapters/1/1.html">1</option>
3
+ <option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
4
4
 
5
5
  <div id="imgholder">
6
6
  <a href="../2/1.html">
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="test/fixtures/Bleach/chapters/1/broken.html">1</option>
3
+ <option value="test/fixtures/Bleach/chapters/1/2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="2.html">
7
+ <img id="img" src="mushroom_risotto.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -67,13 +67,38 @@ describe MangaCrawler::Crawler do
67
67
  end
68
68
 
69
69
  it "must collect all pages from a given chapter" do
70
- link = "https://starkana.me/manga/0/A_Princess_and_a_Bum_(Manhwa)/chapter/7"
70
+
71
+ link = "test/fixtures/Bleach/chapters/1/1.html"
72
+ sample_chapter_page = File.open(link)
73
+
74
+ base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/1.html/,"")
75
+
71
76
  css_pages_path = "#page_switch option"
72
77
  pages_html_field = :value
73
78
 
74
- params = Website::Parameters.new("https://starkana.me", link, css_pages_path, pages_html_field)
79
+ params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
75
80
  chapter_page = Website::Page.new(params)
76
81
 
77
- crawler.get_pages chapter_page, "#pic img"
82
+ pages = crawler.get_pages chapter_page, "#img"
83
+
84
+ pages.must_equal ["mushroom_risotto.jpg", "vegetable_curry.jpg"]
85
+ end
86
+
87
+ it "must continue if some link is broken" do
88
+
89
+ link = "test/fixtures/Bleach/chapters/1/broken-1.html"
90
+ sample_chapter_page = File.open(link)
91
+
92
+ base_url = File.absolute_path(sample_chapter_page).gsub(/test\/fixtures\/Bleach\/chapters\/1\/broken-1.html/,"")
93
+
94
+ css_pages_path = "#page_switch option"
95
+ pages_html_field = :value
96
+
97
+ params = Website::Parameters.new(base_url, link, css_pages_path, pages_html_field)
98
+ chapter_page = Website::Page.new(params)
99
+
100
+ pages = crawler.get_pages chapter_page, "#img"
101
+
102
+ pages.must_equal [nil, "vegetable_curry.jpg"]
78
103
  end
79
104
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manga-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-19 00:00:00.000000000 Z
12
+ date: 2013-04-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -80,6 +80,7 @@ files:
80
80
  - test/fixtures/Bleach/bleach.html
81
81
  - test/fixtures/Bleach/chapters/1/1.html
82
82
  - test/fixtures/Bleach/chapters/1/2.html
83
+ - test/fixtures/Bleach/chapters/1/broken-1.html
83
84
  - test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
84
85
  - test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
85
86
  - test/fixtures/Bleach/chapters/2/1.html
@@ -154,6 +155,7 @@ test_files:
154
155
  - test/fixtures/Bleach/bleach.html
155
156
  - test/fixtures/Bleach/chapters/1/1.html
156
157
  - test/fixtures/Bleach/chapters/1/2.html
158
+ - test/fixtures/Bleach/chapters/1/broken-1.html
157
159
  - test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
158
160
  - test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
159
161
  - test/fixtures/Bleach/chapters/2/1.html