manga-crawler 0.0.2 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/lib/manga-crawler.rb +2 -4
  2. data/lib/manga-crawler/crawler.rb +21 -49
  3. data/lib/manga-crawler/version.rb +1 -1
  4. data/lib/website/page.rb +14 -0
  5. data/lib/website/parameters.rb +13 -0
  6. data/manga-crawler.gemspec +1 -1
  7. data/test/fixtures/Bleach/bleach.html +83 -0
  8. data/test/fixtures/Bleach/chapters/1/1.html +9 -0
  9. data/test/fixtures/Bleach/chapters/1/2.html +9 -0
  10. data/test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg +0 -0
  11. data/test/fixtures/Bleach/chapters/1/vegetable_curry.jpg +0 -0
  12. data/test/fixtures/Bleach/chapters/2/1.html +9 -0
  13. data/test/fixtures/Bleach/chapters/2/2.html +9 -0
  14. data/test/fixtures/Bleach/chapters/2/angry_birds_cake.jpg +0 -0
  15. data/test/fixtures/Bleach/chapters/2/thai_shrimp_cake.jpg +0 -0
  16. data/test/fixtures/Bleach/chapters/3/1.html +9 -0
  17. data/test/fixtures/Bleach/chapters/3/2.html +9 -0
  18. data/test/fixtures/Bleach/chapters/3/instant_noodle_with_egg.jpg +0 -0
  19. data/test/fixtures/Bleach/chapters/3/noodle_with_bbq_pork.jpg +0 -0
  20. data/test/fixtures/OnePiece/chapters/1/1.html +9 -0
  21. data/test/fixtures/OnePiece/chapters/1/2.html +9 -0
  22. data/test/fixtures/OnePiece/chapters/1/full_breakfast.jpg +0 -0
  23. data/test/fixtures/OnePiece/chapters/1/white_chocolate_donut.jpg +0 -0
  24. data/test/fixtures/OnePiece/chapters/2/1.html +9 -0
  25. data/test/fixtures/OnePiece/chapters/2/2.html +9 -0
  26. data/test/fixtures/OnePiece/chapters/2/green_tea.jpg +0 -0
  27. data/test/fixtures/OnePiece/chapters/2/ham_and_egg_sandwich.jpg +0 -0
  28. data/test/fixtures/OnePiece/chapters/3/1.html +9 -0
  29. data/test/fixtures/OnePiece/chapters/3/2.html +9 -0
  30. data/test/fixtures/OnePiece/chapters/3/japanese_noodle_with_pork.jpg +0 -0
  31. data/test/fixtures/OnePiece/chapters/3/starbucks_coffee.jpg +0 -0
  32. data/test/fixtures/OnePiece/one_piece.html +85 -0
  33. data/test/fixtures/images/default_bleach.jpg +0 -0
  34. data/test/fixtures/images/default_naruto.jpg +0 -0
  35. data/test/fixtures/images/default_onepiece.jpg +0 -0
  36. data/test/fixtures/index.html +14 -0
  37. data/test/fixtures/naruto/chapters/1/1.html +9 -0
  38. data/test/fixtures/naruto/chapters/1/2.html +9 -0
  39. data/test/{samples/image.jpg → fixtures/naruto/chapters/1/duck.jpg} +0 -0
  40. data/test/fixtures/naruto/chapters/1/hamburger.jpg +0 -0
  41. data/test/fixtures/naruto/chapters/2/1.html +9 -0
  42. data/test/fixtures/naruto/chapters/2/2.html +9 -0
  43. data/test/fixtures/naruto/chapters/2/egg_benedict.jpg +0 -0
  44. data/test/fixtures/naruto/chapters/2/ham_and_cheese_panini.jpg +0 -0
  45. data/test/fixtures/naruto/chapters/3/1.html +9 -0
  46. data/test/fixtures/naruto/chapters/3/2.html +9 -0
  47. data/test/fixtures/naruto/chapters/3/japanese_noodle_with_pork.jpg +0 -0
  48. data/test/fixtures/naruto/chapters/3/starbucks_coffee.jpg +0 -0
  49. data/test/fixtures/naruto/naruto.html +85 -0
  50. data/test/lib/manga-crawler/crawler_test.rb +43 -21
  51. data/test/lib/website/page_test.rb +17 -0
  52. data/test/lib/website/parameters_test.rb +30 -0
  53. metadata +95 -11
  54. data/test/samples/image-page.html +0 -21
  55. data/test/samples/index-page.html +0 -14
  56. data/test/samples/manga-page.html +0 -38
@@ -1,6 +1,4 @@
1
1
  require "manga-crawler/version"
2
2
  require "manga-crawler/crawler"
3
-
4
- module MangaCrawler
5
- # Your code goes here...
6
- end
3
+ require "website/parameters"
4
+ require "website/page"
@@ -4,25 +4,18 @@ require 'open-uri'
4
4
  module MangaCrawler
5
5
  class Crawler
6
6
 
7
- # Returns an array of pairs. The first position contains the
8
- # manga name and the second position the manga link
9
- # Params:
10
- # +index_link+:: string with the url containing the index of all mangas
11
- # +css_path+:: string of a css path format of the links you want to collect
12
- # +css_pagination+:: string with the css path to the next page link
13
- # +html_field+:: simbol of the field that has the link
14
- def get_mangas index_link, css_path, css_pagination, html_field
7
+ def get_mangas index_website
15
8
 
16
9
  result = Array.new
17
10
 
18
- html_index = Nokogiri::HTML(open(index_link))
11
+ html_index = Nokogiri::HTML(open(index_website.params.current_url))
19
12
 
20
13
  #find all content that matches with the css_path
21
- links = html_index.css(css_path)
14
+ links = html_index.css(index_website.params.css_path)
22
15
 
23
16
  #find all content from the anchor nodes found in last search
24
17
  links.each do |anchor|
25
- result.push([anchor.content, anchor[html_field]])
18
+ result.push([anchor.content, anchor[index_website.params.html_field]])
26
19
  end
27
20
 
28
21
  #TODO
@@ -32,72 +25,51 @@ module MangaCrawler
32
25
  return result
33
26
  end
34
27
 
35
- # Returns the chapters information of a manga. It uses the same
36
- # logic of get_mangas.
37
- # Params:
38
- # +manga_link+:: string with the url containing the manga_link
39
- # +css_path+:: string of the css path format of the links you want to collect
40
- # +css_pagination+:: string with the css path to the next page link
41
- # +html_field+:: simbol of the field that has the link
42
- def get_chapters manga_link, css_path, css_pagination, html_field
28
+ def get_chapters manga_website
43
29
  #TODO
44
30
  #uses the same logic of get_mangas
45
- return get_mangas manga_link, css_path, css_pagination, html_field
31
+ return get_mangas manga_website
46
32
  end
47
33
 
48
- # Returns the direct links of all pages from a specific chapter. It uses two
49
- # methods: 'get_pages_links_from_chapter' and 'get_image_from_page'.
50
- # Params:
51
- # +chapter_link+:: string with the chapter
52
- # +css_pages_path+:: string with the CSS path to the pages links
53
- # +pages_html_field+:: HTML field with the page link value
54
- # +css_image_path+:: CSS path to the image
55
- # +image_html_field+:: HTML field with the direct's image url
56
- # +url_base+:: Site´s base url
57
- def get_pages chapter_link, css_pages_path, pages_html_field, css_image_path, image_html_field, url_base
34
+ def get_pages chapter_website, css_image_path
58
35
 
59
36
  result = Array.new
60
37
 
61
- pages_links = get_pages_links_from_chapter url_base + chapter_link, css_pages_path, pages_html_field
38
+ pages_links = get_pages_links_from_chapter chapter_website
62
39
 
63
40
  pages_links.each do |page|
64
- result.push( get_image_from_page url_base + page[1], css_image_path, image_html_field )
41
+
42
+ current_url = chapter_website.params.base_url + page[1]
43
+
44
+ params = Website::Parameters.new(chapter_website.params.base_url, current_url, css_image_path, :src)
45
+
46
+ result.push( get_image_from_page Website::Page.new(params) )
65
47
  end
66
48
 
67
49
  return result
68
50
 
69
51
  end
70
52
 
71
- # Returns all pages HTML links from a chapter
72
- # Params:
73
- # +chapter_link+:: Link of the chapter
74
- # +css_path+:: CSS path to the block with the pages links
75
- # +html_field+:: HTML field that contains the url
76
- def get_pages_links_from_chapter chapter_link, css_path, html_field
53
+ def get_pages_links_from_chapter chapter_website
77
54
 
78
55
  result = Array.new
79
56
 
80
- chapter_page = Nokogiri::HTML(open(chapter_link))
57
+ chapter_page = Nokogiri::HTML(open(chapter_website.params.current_url))
81
58
 
82
- pages_links = chapter_page.css(css_path)
59
+ pages_links = chapter_page.css(chapter_website.params.css_path)
83
60
 
84
61
  pages_links.each do |option|
85
- result.push([option.content, option[html_field]])
62
+ result.push([option.content, option[chapter_website.params.html_field]])
86
63
  end
87
64
 
88
65
  return result
89
66
  end
90
67
 
91
- # Returns the image's direct url of a manga page
92
- # Params:
93
- # +page_link+:: HTML page who contains the image
94
- # +css_path+:: CSS path to the image
95
- # +html_field+:: field that contains the url
96
- def get_image_from_page page_link, css_path, html_field
68
+ def get_image_from_page image_website
97
69
 
98
- html_image = Nokogiri::HTML(open(page_link))
70
+ html_image = Nokogiri::HTML(open(image_website.params.current_url))
99
71
 
100
- image_link = html_image.at_css(css_path)[html_field]
72
+ image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
101
73
 
102
74
  return image_link
103
75
  end
@@ -1,3 +1,3 @@
1
1
  module MangaCrawler
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -0,0 +1,14 @@
1
+ module Website
2
+ class Page
3
+
4
+ attr_reader :params
5
+
6
+ def initialize(params)
7
+ if params.instance_of?(Website::Parameters)
8
+ @params = params
9
+ else
10
+ raise "#{params} must be an instance of Website::Parameters"
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,13 @@
1
+ module Website
2
+ class Parameters
3
+
4
+ attr_reader :base_url, :current_url, :css_path, :html_field
5
+
6
+ def initialize(base_url, current_url, css_path, html_field)
7
+ @base_url = base_url
8
+ @current_url = current_url
9
+ @css_path = css_path
10
+ @html_field = html_field
11
+ end
12
+ end
13
+ end
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["kimobr@gmail.com"]
11
11
  spec.description = %q{ A gem that collects mangas from websites}
12
12
  spec.summary = %q{ Retrieve basic manga information }
13
- spec.homepage = ""
13
+ spec.homepage = "https://github.com/thiagokimo/manga-crawler"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -0,0 +1,83 @@
1
+ <html>
2
+ <head><title>Bleach</title></head>
3
+
4
+ <body>
5
+ <div class="c_h1">Information</div>
6
+
7
+ <div>
8
+ <table width="100%" border="0" cellspacing="0" cellpadding="0">
9
+ <tr>
10
+ <td valign="top" style="background: #f0f0f0; padding: 5px; border-right: 1px solid #fff;border-bottom-left-radius: 5px;">
11
+ <div><a class="olol" title="Bleach" href="#"><img class="a_img" src="../images/default_bleach.jpg" alt="Bleach"/></a></div>
12
+
13
+ </td>
14
+ <td width="100%" style="background: #e0e0e0; border-bottom-right-radius: 5px;" valign="top">
15
+ <table width="100%" cellspacing="0" cellpadding="0" border="0">
16
+ <tr class="c_h2">
17
+ <td width="80" valign="top"><b>Title(s):</b></td>
18
+ <td>
19
+ Bleach<br/>
20
+ </td>
21
+ </tr>
22
+ <tr class="c_h2b">
23
+ <td valign="top"><b>Creator(s):</b></td>
24
+ <td>
25
+ <a href="#">Lorem Ipsum</a>
26
+ </td>
27
+ </tr>
28
+ <tr class="c_h2">
29
+ <td valign="top"><b>Genres:</b></td>
30
+ <td>
31
+ <a href="#">Action</a>,
32
+ <a href="#">Adventure</a>,
33
+ <a href="#">Anime</a>,
34
+ <a href="#">Comedy</a>,
35
+ <a href="#">Drama</a>,
36
+ <a href="#">Fantasy</a>,
37
+ <a href="#">Shounen</a>,
38
+ <a href="#">Supernatural</a>
39
+ </td>
40
+ </tr>
41
+ <tr class="c_h2b">
42
+ <td valign="top"><b>Start Date:</b></td>
43
+ <td>
44
+ 2001 </td>
45
+ </tr>
46
+ <tr class="c_h2">
47
+ <td valign="top"><b>Status:</b></td>
48
+ <td>
49
+ <span style="color: ">Ongoing</span> </td>
50
+ </tr>
51
+ <tr class="c_h2b">
52
+ <td valign="top" style="border-bottom: 0;"><b>Summary:</b></td>
53
+ <td style="border-bottom: 0;">
54
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
55
+ tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
56
+ quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
57
+ consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
58
+ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
59
+ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.<br />
60
+ </tr>
61
+ </table>
62
+ </td>
63
+ </tr>
64
+ </table>
65
+ </div>
66
+
67
+ <div class="c_h1" style="margin-top:1px;">Chapters</div>
68
+
69
+ <div class="episode c_h2">
70
+ <div><a class="download-link" href="chapters/1/1.html">Bleach <em>chapter</em> <strong>1</strong></a> </div>
71
+ <div class="clear"></div>
72
+ </div>
73
+ <div class="episode c_h2b">
74
+ <div><a class="download-link" href="chapters/2/1.html">Bleach <em>chapter</em> <strong>2</strong></a> </div>
75
+ <div class="clear"></div>
76
+ </div>
77
+ <div class="episode c_h2">
78
+ <div><a class="download-link" href="chapters/3/1.html">Bleach <em>chapter</em> <strong>3</strong></a> </div>
79
+ <div class="clear"></div>
80
+ </div>
81
+
82
+ </body>
83
+ </html>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="2.html">
7
+ <img id="img" src="mushroom_risotto.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../2/1.html">
7
+ <img id="img" src="vegetable_curry.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="2.html">
7
+ <img id="img" src="angry_birds_cake.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../3/1.html">
7
+ <img id="img" src="thai_shrimp_cake.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../3/2.html">
7
+ <img id="img" src="instant_noodle_with_egg.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="#">
7
+ <img id="img" src="noodle_with_bbq_pork.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="2.html">
7
+ <img id="img" src="full_breakfast.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../2/1.html">
7
+ <img id="img" src="white_chocolate_donut.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="2.html">
7
+ <img id="img" src="green_tea.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../3/1.html">
7
+ <img id="img" src="ham_and_egg_sandwich.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../3/2.html">
7
+ <img id="img" src="starbucks_coffee.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="#">
7
+ <img id="img" src="japanese_noodle_with_pork.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,85 @@
1
+ <html>
2
+ <head>
3
+ <title>One Piece</title>
4
+ </head>
5
+ <body>
6
+
7
+ <div class="c_h1">Information</div>
8
+
9
+ <div>
10
+ <table width="100%" border="0" cellspacing="0" cellpadding="0">
11
+ <tr>
12
+ <td valign="top" style="background: #f0f0f0; padding: 5px; border-right: 1px solid #fff;border-bottom-left-radius: 5px;">
13
+ <div><a class="olol" title="One Piece" href="#"><img class="a_img" src="../images/default_onepiece.jpg" alt="One Piece"/></a></div>
14
+ </td>
15
+ <td width="100%" style="background: #e0e0e0; border-bottom-right-radius: 5px;" valign="top">
16
+ <table width="100%" cellspacing="0" cellpadding="0" border="0">
17
+ <tr class="c_h2">
18
+ <td width="80" valign="top"><b>Title(s):</b></td>
19
+ <td>
20
+ One Piece<br/>
21
+ </td>
22
+ </tr>
23
+ <tr class="c_h2b">
24
+ <td valign="top"><b>Creator(s):</b></td>
25
+ <td>
26
+ <a href="/manga/search?c=321">Lorem Ipsum</a>
27
+ </td>
28
+ </tr>
29
+ <tr class="c_h2">
30
+ <td valign="top"><b>Genres:</b></td>
31
+ <td>
32
+ <a href="#">Action</a>,
33
+ <a href="#">Adventure</a>,
34
+ <a href="#">Anime</a>,
35
+ <a href="#">Comedy</a>,
36
+ <a href="#">Drama</a>,
37
+ <a href="#">Fantasy</a>,
38
+ <a href="#">Shounen</a>
39
+ </td>
40
+ </tr>
41
+ <tr class="c_h2b">
42
+ <td valign="top"><b>Start Date:</b></td>
43
+ <td>
44
+ 1999 </td>
45
+ </tr>
46
+ <tr class="c_h2">
47
+ <td valign="top"><b>Status:</b></td>
48
+ <td>
49
+ <span style="color: ">Ongoing</span> </td>
50
+ </tr>
51
+ <tr class="c_h2b">
52
+ <td valign="top" style="border-bottom: 0;"><b>Summary:</b></td>
53
+ <td style="border-bottom: 0;">
54
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
55
+ tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
56
+ quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
57
+ consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
58
+ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
59
+ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.<br />
60
+ <br />
61
+ </td>
62
+ </tr>
63
+ </table>
64
+ </td>
65
+ </tr>
66
+ </table>
67
+ </div>
68
+
69
+ <div class="c_h1" style="margin-top:1px;">Chapters</div>
70
+
71
+ <div class="episode c_h2">
72
+ <div><a class="download-link" href="chapters/1/1.html">One Piece <em>chapter</em> <strong>1</strong></a> </div>
73
+ <div class="clear"></div>
74
+ </div>
75
+ <div class="episode c_h2b">
76
+ <div><a class="download-link" href="chapters/2/1.html">One Piece <em>chapter</em> <strong>2</strong></a> </div>
77
+ <div class="clear"></div>
78
+ </div>
79
+ <div class="episode c_h2">
80
+ <div><a class="download-link" href="chapters/3/1.html">One Piece <em>chapter</em> <strong>3</strong></a> </div>
81
+ <div class="clear"></div>
82
+ </div>
83
+
84
+ </body>
85
+ </html>
@@ -0,0 +1,14 @@
1
+ <html>
2
+ <head>
3
+ <title>A sample manga website</title>
4
+ </head>
5
+ <body>
6
+ <div class="simple_div">
7
+ <ul class="simple_div">
8
+ <li><a href="Naruto/naruto.html">Naruto</a></li>
9
+ <li><a href="Bleach/bleach.html">Bleach</a></li>
10
+ <li><a href="OnePiece/one_piece.html">One Piece</a></li>
11
+ </ul>
12
+ </div>
13
+ </body>
14
+ </html>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="2.html">
7
+ <img id="img" src="duck.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../2/1.html">
7
+ <img id="img" src="hamburger.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="2.html">
7
+ <img id="img" src="egg_benedict.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../3/1.html">
7
+ <img id="img" src="ham_and_cheese_panini.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="../3/2.html">
7
+ <img id="img" src="starbucks_coffee.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
2
+ <option selected value="1.html">1</option>
3
+ <option value="2.html">2</option></select> of <strong>2</strong>
4
+
5
+ <div id="imgholder">
6
+ <a href="#">
7
+ <img id="img" src="japanese_noodle_with_pork.jpg" alt="An image" name="img"/>
8
+ </a>
9
+ </div>
@@ -0,0 +1,85 @@
1
+ <html>
2
+ <head>
3
+ <title>Naruto</title>
4
+ </head>
5
+ <body>
6
+
7
+ <div class="c_h1">Information</div>
8
+
9
+ <div>
10
+ <table width="100%" border="0" cellspacing="0" cellpadding="0">
11
+ <tr>
12
+ <td valign="top" style="background: #f0f0f0; padding: 5px; border-right: 1px solid #fff;border-bottom-left-radius: 5px;">
13
+ <div><a class="olol" title="Naruto" href="#"><img class="a_img" src="../images/default_naruto.jpg" alt="Naruto"/></a></div>
14
+ </td>
15
+ <td width="100%" style="background: #e0e0e0; border-bottom-right-radius: 5px;" valign="top">
16
+ <table width="100%" cellspacing="0" cellpadding="0" border="0">
17
+ <tr class="c_h2">
18
+ <td width="80" valign="top"><b>Title(s):</b></td>
19
+ <td>
20
+ Naruto<br/>
21
+ </td>
22
+ </tr>
23
+ <tr class="c_h2b">
24
+ <td valign="top"><b>Creator(s):</b></td>
25
+ <td>
26
+ <a href="#">Lorem Ipsum</a>
27
+ </td>
28
+ </tr>
29
+ <tr class="c_h2">
30
+ <td valign="top"><b>Genres:</b></td>
31
+ <td>
32
+ <a href="#">Action</a>,
33
+ <a href="#">Adventure</a>,
34
+ <a href="#">Anime</a>,
35
+ <a href="#">Comedy</a>,
36
+ <a href="#">Drama</a>,
37
+ <a href="#">Fantasy</a>,
38
+ <a href="#">Shounen</a>
39
+ </td>
40
+ </tr>
41
+ <tr class="c_h2b">
42
+ <td valign="top"><b>Start Date:</b></td>
43
+ <td>
44
+ 1999 </td>
45
+ </tr>
46
+ <tr class="c_h2">
47
+ <td valign="top"><b>Status:</b></td>
48
+ <td>
49
+ <span style="color: ">Ongoing</span> </td>
50
+ </tr>
51
+ <tr class="c_h2b">
52
+ <td valign="top" style="border-bottom: 0;"><b>Summary:</b></td>
53
+ <td style="border-bottom: 0;">
54
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
55
+ tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
56
+ quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
57
+ consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
58
+ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
59
+ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.<br />
60
+ <br />
61
+ </td>
62
+ </tr>
63
+ </table>
64
+ </td>
65
+ </tr>
66
+ </table>
67
+ </div>
68
+
69
+ <div class="c_h1" style="margin-top:1px;">Chapters</div>
70
+
71
+ <div class="episode c_h2">
72
+ <div><a class="download-link" href="chapters/1/1.html">Naruto <em>chapter</em> <strong>1</strong></a> </div>
73
+ <div class="clear"></div>
74
+ </div>
75
+ <div class="episode c_h2b">
76
+ <div><a class="download-link" href="chapters/2/1.html">Naruto <em>chapter</em> <strong>2</strong></a> </div>
77
+ <div class="clear"></div>
78
+ </div>
79
+ <div class="episode c_h2">
80
+ <div><a class="download-link" href="chapters/3/1.html">Naruto <em>chapter</em> <strong>3</strong></a> </div>
81
+ <div class="clear"></div>
82
+ </div>
83
+
84
+ </body>
85
+ </html>
@@ -3,55 +3,77 @@ require "test_helper"
3
3
  describe MangaCrawler::Crawler do
4
4
 
5
5
  crawler = MangaCrawler::Crawler.new
6
+ base_url = "localhost"
6
7
 
7
8
  it "must retrieve mangas" do
8
-
9
- sample_index = File.open("test/samples/index-page.html")
9
+
10
+ sample_index_page = File.open("test/fixtures/index.html")
10
11
  css_path = "a"
11
12
  html_field = :href
13
+
14
+ params = Website::Parameters.new(base_url, sample_index_page, css_path, html_field)
15
+ index_page = Website::Page.new(params)
12
16
 
13
- mangas = crawler.get_mangas sample_index, css_path, nil, html_field
17
+ mangas = crawler.get_mangas index_page
14
18
 
15
- mangas.must_equal [ ["Naruto", "/first-manga"],
16
- ["Bleach", "/second-manga"],
17
- ["One Piece", "/third-manga"] ]
19
+ mangas.must_equal [ ["Naruto", "Naruto/naruto.html"],
20
+ ["Bleach", "Bleach/bleach.html"],
21
+ ["One Piece", "OnePiece/one_piece.html"] ]
18
22
  end
19
23
 
20
24
  it "must retrieve chapters" do
21
25
 
22
- sample_manga_page = File.open("test/samples/manga-page.html")
23
- css_path = "a"
26
+ sample_manga_page = File.open("test/fixtures/Bleach/bleach.html")
27
+ css_path = ".download-link"
24
28
  html_field = :href
25
29
 
26
- chapters = crawler.get_chapters sample_manga_page, css_path, nil, html_field
30
+ params = Website::Parameters.new(base_url, sample_manga_page, css_path, html_field)
31
+ manga_page = Website::Page.new(params)
32
+
33
+ chapters = crawler.get_chapters manga_page
27
34
 
28
- chapters.must_equal [ ["Chapter 1", "/first-manga/1"],
29
- ["Chapter 2", "/second-manga/2"],
30
- ["Chapter 3", "/third-manga/3"] ]
35
+ chapters.must_equal [ ["Bleach chapter 1", "chapters/1/1.html"],
36
+ ["Bleach chapter 2", "chapters/2/1.html"],
37
+ ["Bleach chapter 3", "chapters/3/1.html"] ]
31
38
  end
32
39
 
33
40
  it "must retrieve a direct image link from a page" do
34
41
 
35
- sample_image_page = File.open("test/samples/image-page.html")
42
+ sample_image_page = File.open("test/fixtures/naruto/chapters/1/1.html")
36
43
  css_path = "#img"
37
44
  html_field = :src
38
45
 
39
- image = crawler.get_image_from_page sample_image_page, css_path, html_field
46
+ params = Website::Parameters.new(base_url, sample_image_page, css_path, html_field)
47
+ image_page = Website::Page.new(params)
48
+
49
+ image = crawler.get_image_from_page image_page
40
50
 
41
- image.must_equal "image.jpg"
51
+ image.must_equal "duck.jpg"
42
52
  end
43
53
 
44
54
  it "must retrieve all pages links from a chapter" do
45
55
 
46
- sample_image_page = File.open("test/samples/image-page.html")
47
- css_path = "#pageMenu option"
56
+ sample_image_page = File.open("test/fixtures/OnePiece/chapters/2/1.html")
57
+ css_path = "#page_switch option"
48
58
  html_field = :value
59
+
60
+ params = Website::Parameters.new(base_url, sample_image_page, css_path, html_field)
61
+ pages = Website::Page.new(params)
49
62
 
50
- pages_links = crawler.get_pages_links_from_chapter sample_image_page, css_path, html_field
63
+ pages_links = crawler.get_pages_links_from_chapter pages
64
+
65
+ pages_links.must_equal [ ["1", "1.html"], ["2", "2.html"] ]
66
+
67
+ end
68
+
69
+ it "must collect all pages from a given chapter" do
70
+ link = "https://starkana.me/manga/0/A_Princess_and_a_Bum_(Manhwa)/chapter/7"
71
+ css_pages_path = "#page_switch option"
72
+ pages_html_field = :value
51
73
 
52
- pages_links.must_equal [ ["1", "/first-manga/1/1"],
53
- ["2", "/first-manga/1/2"],
54
- ["3", "/first-manga/1/3"] ]
74
+ params = Website::Parameters.new("https://starkana.me", link, css_pages_path, pages_html_field)
75
+ chapter_page = Website::Page.new(params)
55
76
 
77
+ crawler.get_pages chapter_page, "#pic img"
56
78
  end
57
79
  end
@@ -0,0 +1,17 @@
1
+ require "test_helper"
2
+
3
+ describe Website::Page do
4
+
5
+ params = Website::Parameters.new("","","","")
6
+ page = Website::Page.new(params)
7
+
8
+ it "must have parameters" do
9
+ page.params.wont_be_nil
10
+ lambda { Website::Page.new(nil) }.must_raise(RuntimeError)
11
+ end
12
+
13
+ it "params must be an instance of Website::Parameters" do
14
+ lambda { Website::Page.new("invalid params") }.must_raise(RuntimeError)
15
+ lambda { Website::Page.new(params) }.must_be_silent
16
+ end
17
+ end
@@ -0,0 +1,30 @@
1
+ require "test_helper"
2
+
3
+ describe Website::Parameters do
4
+
5
+ describe "default attributes" do
6
+
7
+ base_url = "lorem.ipsum.dolor"
8
+ current_url = "/lorem"
9
+ css_path = "#lorem"
10
+ html_field = "src"
11
+ params = Website::Parameters.new(base_url, current_url, css_path, html_field)
12
+
13
+ it "must have a base url" do
14
+ params.base_url.wont_be_nil
15
+ end
16
+
17
+ it "must have a current url" do
18
+ params.current_url.wont_be_nil
19
+ end
20
+
21
+ it "must have a css path" do
22
+ params.css_path.wont_be_nil
23
+ end
24
+
25
+ it "must have a html attribute" do
26
+ params.html_field.wont_be_nil
27
+ end
28
+ end
29
+
30
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manga-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-16 00:00:00.000000000 Z
12
+ date: 2013-04-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -74,15 +74,58 @@ files:
74
74
  - lib/manga-crawler.rb
75
75
  - lib/manga-crawler/crawler.rb
76
76
  - lib/manga-crawler/version.rb
77
+ - lib/website/page.rb
78
+ - lib/website/parameters.rb
77
79
  - manga-crawler.gemspec
80
+ - test/fixtures/Bleach/bleach.html
81
+ - test/fixtures/Bleach/chapters/1/1.html
82
+ - test/fixtures/Bleach/chapters/1/2.html
83
+ - test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
84
+ - test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
85
+ - test/fixtures/Bleach/chapters/2/1.html
86
+ - test/fixtures/Bleach/chapters/2/2.html
87
+ - test/fixtures/Bleach/chapters/2/angry_birds_cake.jpg
88
+ - test/fixtures/Bleach/chapters/2/thai_shrimp_cake.jpg
89
+ - test/fixtures/Bleach/chapters/3/1.html
90
+ - test/fixtures/Bleach/chapters/3/2.html
91
+ - test/fixtures/Bleach/chapters/3/instant_noodle_with_egg.jpg
92
+ - test/fixtures/Bleach/chapters/3/noodle_with_bbq_pork.jpg
93
+ - test/fixtures/OnePiece/chapters/1/1.html
94
+ - test/fixtures/OnePiece/chapters/1/2.html
95
+ - test/fixtures/OnePiece/chapters/1/full_breakfast.jpg
96
+ - test/fixtures/OnePiece/chapters/1/white_chocolate_donut.jpg
97
+ - test/fixtures/OnePiece/chapters/2/1.html
98
+ - test/fixtures/OnePiece/chapters/2/2.html
99
+ - test/fixtures/OnePiece/chapters/2/green_tea.jpg
100
+ - test/fixtures/OnePiece/chapters/2/ham_and_egg_sandwich.jpg
101
+ - test/fixtures/OnePiece/chapters/3/1.html
102
+ - test/fixtures/OnePiece/chapters/3/2.html
103
+ - test/fixtures/OnePiece/chapters/3/japanese_noodle_with_pork.jpg
104
+ - test/fixtures/OnePiece/chapters/3/starbucks_coffee.jpg
105
+ - test/fixtures/OnePiece/one_piece.html
106
+ - test/fixtures/images/default_bleach.jpg
107
+ - test/fixtures/images/default_naruto.jpg
108
+ - test/fixtures/images/default_onepiece.jpg
109
+ - test/fixtures/index.html
110
+ - test/fixtures/naruto/chapters/1/1.html
111
+ - test/fixtures/naruto/chapters/1/2.html
112
+ - test/fixtures/naruto/chapters/1/duck.jpg
113
+ - test/fixtures/naruto/chapters/1/hamburger.jpg
114
+ - test/fixtures/naruto/chapters/2/1.html
115
+ - test/fixtures/naruto/chapters/2/2.html
116
+ - test/fixtures/naruto/chapters/2/egg_benedict.jpg
117
+ - test/fixtures/naruto/chapters/2/ham_and_cheese_panini.jpg
118
+ - test/fixtures/naruto/chapters/3/1.html
119
+ - test/fixtures/naruto/chapters/3/2.html
120
+ - test/fixtures/naruto/chapters/3/japanese_noodle_with_pork.jpg
121
+ - test/fixtures/naruto/chapters/3/starbucks_coffee.jpg
122
+ - test/fixtures/naruto/naruto.html
78
123
  - test/lib/manga-crawler/crawler_test.rb
79
124
  - test/lib/manga-crawler/version_test.rb
80
- - test/samples/image-page.html
81
- - test/samples/image.jpg
82
- - test/samples/index-page.html
83
- - test/samples/manga-page.html
125
+ - test/lib/website/page_test.rb
126
+ - test/lib/website/parameters_test.rb
84
127
  - test/test_helper.rb
85
- homepage: ''
128
+ homepage: https://github.com/thiagokimo/manga-crawler
86
129
  licenses:
87
130
  - MIT
88
131
  post_install_message:
@@ -108,10 +151,51 @@ signing_key:
108
151
  specification_version: 3
109
152
  summary: Retrieve basic manga information
110
153
  test_files:
154
+ - test/fixtures/Bleach/bleach.html
155
+ - test/fixtures/Bleach/chapters/1/1.html
156
+ - test/fixtures/Bleach/chapters/1/2.html
157
+ - test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
158
+ - test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
159
+ - test/fixtures/Bleach/chapters/2/1.html
160
+ - test/fixtures/Bleach/chapters/2/2.html
161
+ - test/fixtures/Bleach/chapters/2/angry_birds_cake.jpg
162
+ - test/fixtures/Bleach/chapters/2/thai_shrimp_cake.jpg
163
+ - test/fixtures/Bleach/chapters/3/1.html
164
+ - test/fixtures/Bleach/chapters/3/2.html
165
+ - test/fixtures/Bleach/chapters/3/instant_noodle_with_egg.jpg
166
+ - test/fixtures/Bleach/chapters/3/noodle_with_bbq_pork.jpg
167
+ - test/fixtures/OnePiece/chapters/1/1.html
168
+ - test/fixtures/OnePiece/chapters/1/2.html
169
+ - test/fixtures/OnePiece/chapters/1/full_breakfast.jpg
170
+ - test/fixtures/OnePiece/chapters/1/white_chocolate_donut.jpg
171
+ - test/fixtures/OnePiece/chapters/2/1.html
172
+ - test/fixtures/OnePiece/chapters/2/2.html
173
+ - test/fixtures/OnePiece/chapters/2/green_tea.jpg
174
+ - test/fixtures/OnePiece/chapters/2/ham_and_egg_sandwich.jpg
175
+ - test/fixtures/OnePiece/chapters/3/1.html
176
+ - test/fixtures/OnePiece/chapters/3/2.html
177
+ - test/fixtures/OnePiece/chapters/3/japanese_noodle_with_pork.jpg
178
+ - test/fixtures/OnePiece/chapters/3/starbucks_coffee.jpg
179
+ - test/fixtures/OnePiece/one_piece.html
180
+ - test/fixtures/images/default_bleach.jpg
181
+ - test/fixtures/images/default_naruto.jpg
182
+ - test/fixtures/images/default_onepiece.jpg
183
+ - test/fixtures/index.html
184
+ - test/fixtures/naruto/chapters/1/1.html
185
+ - test/fixtures/naruto/chapters/1/2.html
186
+ - test/fixtures/naruto/chapters/1/duck.jpg
187
+ - test/fixtures/naruto/chapters/1/hamburger.jpg
188
+ - test/fixtures/naruto/chapters/2/1.html
189
+ - test/fixtures/naruto/chapters/2/2.html
190
+ - test/fixtures/naruto/chapters/2/egg_benedict.jpg
191
+ - test/fixtures/naruto/chapters/2/ham_and_cheese_panini.jpg
192
+ - test/fixtures/naruto/chapters/3/1.html
193
+ - test/fixtures/naruto/chapters/3/2.html
194
+ - test/fixtures/naruto/chapters/3/japanese_noodle_with_pork.jpg
195
+ - test/fixtures/naruto/chapters/3/starbucks_coffee.jpg
196
+ - test/fixtures/naruto/naruto.html
111
197
  - test/lib/manga-crawler/crawler_test.rb
112
198
  - test/lib/manga-crawler/version_test.rb
113
- - test/samples/image-page.html
114
- - test/samples/image.jpg
115
- - test/samples/index-page.html
116
- - test/samples/manga-page.html
199
+ - test/lib/website/page_test.rb
200
+ - test/lib/website/parameters_test.rb
117
201
  - test/test_helper.rb
@@ -1,21 +0,0 @@
1
- <html>
2
- <head>
3
- <title>A sample manga website</title>
4
- </head>
5
- <body>
6
-
7
- <div id="selectpage">
8
- <select id="pageMenu" name="pageMenu">
9
- <option value="/first-manga/1/1" selected="selected">1</option>
10
- <option value="/first-manga/1/2">2</option>
11
- <option value="/first-manga/1/3">3</option>
12
- </select>
13
- </div>
14
-
15
- <div id="imgholder">
16
- <a href="/first-manga/1/2">
17
- <img id="img" src="image.jpg" alt="An image" name="img"/>
18
- </a>
19
- </div>
20
- </body>
21
- </html>
@@ -1,14 +0,0 @@
1
- <html>
2
- <head>
3
- <title>A sample manga website</title>
4
- </head>
5
- <body>
6
- <div class="simple_div">
7
- <ul class="simple_div">
8
- <li><a href="/first-manga">Naruto</a></li>
9
- <li><a href="/second-manga">Bleach</a></li>
10
- <li><a href="/third-manga">One Piece</a></li>
11
- </ul>
12
- </div>
13
- </body>
14
- </html>
@@ -1,38 +0,0 @@
1
- <html>
2
- <head>
3
- <title>A sample manga website</title>
4
- </head>
5
- <body>
6
- <div id="chapterlist">
7
- <table id="listing">
8
-
9
- <tr class="table_head">
10
- <th class="leftgap">Chapter Name</th>
11
- <th>Date Added</th>
12
- </tr>
13
-
14
- <tr>
15
- <td>
16
- <div class="chico_manga"></div>
17
- <a href="/first-manga/1">Chapter 1</a> : First chapter </td>
18
- <td>09/09/2011</td>
19
- </tr>
20
-
21
- <tr>
22
- <td>
23
- <div class="chico_manga"></div>
24
- <a href="/second-manga/2">Chapter 2</a> : Second chapter</td>
25
- <td>09/09/2011</td>
26
- </tr>
27
-
28
- <tr>
29
- <td>
30
- <div class="chico_manga"></div>
31
- <a href="/third-manga/3">Chapter 3</a> : Third chapter</td>
32
- <td>09/09/2011</td>
33
- </tr>
34
-
35
- </table>
36
- </div>
37
- </body>
38
- </html>