manga-crawler 0.0.2 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/manga-crawler.rb +2 -4
- data/lib/manga-crawler/crawler.rb +21 -49
- data/lib/manga-crawler/version.rb +1 -1
- data/lib/website/page.rb +14 -0
- data/lib/website/parameters.rb +13 -0
- data/manga-crawler.gemspec +1 -1
- data/test/fixtures/Bleach/bleach.html +83 -0
- data/test/fixtures/Bleach/chapters/1/1.html +9 -0
- data/test/fixtures/Bleach/chapters/1/2.html +9 -0
- data/test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg +0 -0
- data/test/fixtures/Bleach/chapters/1/vegetable_curry.jpg +0 -0
- data/test/fixtures/Bleach/chapters/2/1.html +9 -0
- data/test/fixtures/Bleach/chapters/2/2.html +9 -0
- data/test/fixtures/Bleach/chapters/2/angry_birds_cake.jpg +0 -0
- data/test/fixtures/Bleach/chapters/2/thai_shrimp_cake.jpg +0 -0
- data/test/fixtures/Bleach/chapters/3/1.html +9 -0
- data/test/fixtures/Bleach/chapters/3/2.html +9 -0
- data/test/fixtures/Bleach/chapters/3/instant_noodle_with_egg.jpg +0 -0
- data/test/fixtures/Bleach/chapters/3/noodle_with_bbq_pork.jpg +0 -0
- data/test/fixtures/OnePiece/chapters/1/1.html +9 -0
- data/test/fixtures/OnePiece/chapters/1/2.html +9 -0
- data/test/fixtures/OnePiece/chapters/1/full_breakfast.jpg +0 -0
- data/test/fixtures/OnePiece/chapters/1/white_chocolate_donut.jpg +0 -0
- data/test/fixtures/OnePiece/chapters/2/1.html +9 -0
- data/test/fixtures/OnePiece/chapters/2/2.html +9 -0
- data/test/fixtures/OnePiece/chapters/2/green_tea.jpg +0 -0
- data/test/fixtures/OnePiece/chapters/2/ham_and_egg_sandwich.jpg +0 -0
- data/test/fixtures/OnePiece/chapters/3/1.html +9 -0
- data/test/fixtures/OnePiece/chapters/3/2.html +9 -0
- data/test/fixtures/OnePiece/chapters/3/japanese_noodle_with_pork.jpg +0 -0
- data/test/fixtures/OnePiece/chapters/3/starbucks_coffee.jpg +0 -0
- data/test/fixtures/OnePiece/one_piece.html +85 -0
- data/test/fixtures/images/default_bleach.jpg +0 -0
- data/test/fixtures/images/default_naruto.jpg +0 -0
- data/test/fixtures/images/default_onepiece.jpg +0 -0
- data/test/fixtures/index.html +14 -0
- data/test/fixtures/naruto/chapters/1/1.html +9 -0
- data/test/fixtures/naruto/chapters/1/2.html +9 -0
- data/test/{samples/image.jpg → fixtures/naruto/chapters/1/duck.jpg} +0 -0
- data/test/fixtures/naruto/chapters/1/hamburger.jpg +0 -0
- data/test/fixtures/naruto/chapters/2/1.html +9 -0
- data/test/fixtures/naruto/chapters/2/2.html +9 -0
- data/test/fixtures/naruto/chapters/2/egg_benedict.jpg +0 -0
- data/test/fixtures/naruto/chapters/2/ham_and_cheese_panini.jpg +0 -0
- data/test/fixtures/naruto/chapters/3/1.html +9 -0
- data/test/fixtures/naruto/chapters/3/2.html +9 -0
- data/test/fixtures/naruto/chapters/3/japanese_noodle_with_pork.jpg +0 -0
- data/test/fixtures/naruto/chapters/3/starbucks_coffee.jpg +0 -0
- data/test/fixtures/naruto/naruto.html +85 -0
- data/test/lib/manga-crawler/crawler_test.rb +43 -21
- data/test/lib/website/page_test.rb +17 -0
- data/test/lib/website/parameters_test.rb +30 -0
- metadata +95 -11
- data/test/samples/image-page.html +0 -21
- data/test/samples/index-page.html +0 -14
- data/test/samples/manga-page.html +0 -38
data/lib/manga-crawler.rb
CHANGED
@@ -4,25 +4,18 @@ require 'open-uri'
|
|
4
4
|
module MangaCrawler
|
5
5
|
class Crawler
|
6
6
|
|
7
|
-
|
8
|
-
# manga name and the second position the manga link
|
9
|
-
# Params:
|
10
|
-
# +index_link+:: string with the url containing the index of all mangas
|
11
|
-
# +css_path+:: string of a css path format of the links you want to collect
|
12
|
-
# +css_pagination+:: string with the css path to the next page link
|
13
|
-
# +html_field+:: simbol of the field that has the link
|
14
|
-
def get_mangas index_link, css_path, css_pagination, html_field
|
7
|
+
def get_mangas index_website
|
15
8
|
|
16
9
|
result = Array.new
|
17
10
|
|
18
|
-
html_index = Nokogiri::HTML(open(
|
11
|
+
html_index = Nokogiri::HTML(open(index_website.params.current_url))
|
19
12
|
|
20
13
|
#find all content that matches with the css_path
|
21
|
-
links = html_index.css(css_path)
|
14
|
+
links = html_index.css(index_website.params.css_path)
|
22
15
|
|
23
16
|
#find all content from the anchor nodes found in last search
|
24
17
|
links.each do |anchor|
|
25
|
-
result.push([anchor.content, anchor[html_field]])
|
18
|
+
result.push([anchor.content, anchor[index_website.params.html_field]])
|
26
19
|
end
|
27
20
|
|
28
21
|
#TODO
|
@@ -32,72 +25,51 @@ module MangaCrawler
|
|
32
25
|
return result
|
33
26
|
end
|
34
27
|
|
35
|
-
|
36
|
-
# logic of get_mangas.
|
37
|
-
# Params:
|
38
|
-
# +manga_link+:: string with the url containing the manga_link
|
39
|
-
# +css_path+:: string of the css path format of the links you want to collect
|
40
|
-
# +css_pagination+:: string with the css path to the next page link
|
41
|
-
# +html_field+:: simbol of the field that has the link
|
42
|
-
def get_chapters manga_link, css_path, css_pagination, html_field
|
28
|
+
def get_chapters manga_website
|
43
29
|
#TODO
|
44
30
|
#uses the same logic of get_mangas
|
45
|
-
return get_mangas
|
31
|
+
return get_mangas manga_website
|
46
32
|
end
|
47
33
|
|
48
|
-
|
49
|
-
# methods: 'get_pages_links_from_chapter' and 'get_image_from_page'.
|
50
|
-
# Params:
|
51
|
-
# +chapter_link+:: string with the chapter
|
52
|
-
# +css_pages_path+:: string with the CSS path to the pages links
|
53
|
-
# +pages_html_field+:: HTML field with the page link value
|
54
|
-
# +css_image_path+:: CSS path to the image
|
55
|
-
# +image_html_field+:: HTML field with the direct's image url
|
56
|
-
# +url_base+:: Site´s base url
|
57
|
-
def get_pages chapter_link, css_pages_path, pages_html_field, css_image_path, image_html_field, url_base
|
34
|
+
def get_pages chapter_website, css_image_path
|
58
35
|
|
59
36
|
result = Array.new
|
60
37
|
|
61
|
-
pages_links = get_pages_links_from_chapter
|
38
|
+
pages_links = get_pages_links_from_chapter chapter_website
|
62
39
|
|
63
40
|
pages_links.each do |page|
|
64
|
-
|
41
|
+
|
42
|
+
current_url = chapter_website.params.base_url + page[1]
|
43
|
+
|
44
|
+
params = Website::Parameters.new(chapter_website.params.base_url, current_url, css_image_path, :src)
|
45
|
+
|
46
|
+
result.push( get_image_from_page Website::Page.new(params) )
|
65
47
|
end
|
66
48
|
|
67
49
|
return result
|
68
50
|
|
69
51
|
end
|
70
52
|
|
71
|
-
|
72
|
-
# Params:
|
73
|
-
# +chapter_link+:: Link of the chapter
|
74
|
-
# +css_path+:: CSS path to the block with the pages links
|
75
|
-
# +html_field+:: HTML field that contains the url
|
76
|
-
def get_pages_links_from_chapter chapter_link, css_path, html_field
|
53
|
+
def get_pages_links_from_chapter chapter_website
|
77
54
|
|
78
55
|
result = Array.new
|
79
56
|
|
80
|
-
chapter_page = Nokogiri::HTML(open(
|
57
|
+
chapter_page = Nokogiri::HTML(open(chapter_website.params.current_url))
|
81
58
|
|
82
|
-
pages_links = chapter_page.css(css_path)
|
59
|
+
pages_links = chapter_page.css(chapter_website.params.css_path)
|
83
60
|
|
84
61
|
pages_links.each do |option|
|
85
|
-
result.push([option.content, option[html_field]])
|
62
|
+
result.push([option.content, option[chapter_website.params.html_field]])
|
86
63
|
end
|
87
64
|
|
88
65
|
return result
|
89
66
|
end
|
90
67
|
|
91
|
-
|
92
|
-
# Params:
|
93
|
-
# +page_link+:: HTML page who contains the image
|
94
|
-
# +css_path+:: CSS path to the image
|
95
|
-
# +html_field+:: field that contains the url
|
96
|
-
def get_image_from_page page_link, css_path, html_field
|
68
|
+
def get_image_from_page image_website
|
97
69
|
|
98
|
-
html_image = Nokogiri::HTML(open(
|
70
|
+
html_image = Nokogiri::HTML(open(image_website.params.current_url))
|
99
71
|
|
100
|
-
image_link = html_image.at_css(css_path)[html_field]
|
72
|
+
image_link = html_image.at_css(image_website.params.css_path)[image_website.params.html_field]
|
101
73
|
|
102
74
|
return image_link
|
103
75
|
end
|
data/lib/website/page.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
module Website
|
2
|
+
class Parameters
|
3
|
+
|
4
|
+
attr_reader :base_url, :current_url, :css_path, :html_field
|
5
|
+
|
6
|
+
def initialize(base_url, current_url, css_path, html_field)
|
7
|
+
@base_url = base_url
|
8
|
+
@current_url = current_url
|
9
|
+
@css_path = css_path
|
10
|
+
@html_field = html_field
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/manga-crawler.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["kimobr@gmail.com"]
|
11
11
|
spec.description = %q{ A gem that collects mangas from websites}
|
12
12
|
spec.summary = %q{ Retrieve basic manga information }
|
13
|
-
spec.homepage = ""
|
13
|
+
spec.homepage = "https://github.com/thiagokimo/manga-crawler"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
16
16
|
spec.files = `git ls-files`.split($/)
|
@@ -0,0 +1,83 @@
|
|
1
|
+
<html>
|
2
|
+
<head><title>Bleach</title></head>
|
3
|
+
|
4
|
+
<body>
|
5
|
+
<div class="c_h1">Information</div>
|
6
|
+
|
7
|
+
<div>
|
8
|
+
<table width="100%" border="0" cellspacing="0" cellpadding="0">
|
9
|
+
<tr>
|
10
|
+
<td valign="top" style="background: #f0f0f0; padding: 5px; border-right: 1px solid #fff;border-bottom-left-radius: 5px;">
|
11
|
+
<div><a class="olol" title="Bleach" href="#"><img class="a_img" src="../images/default_bleach.jpg" alt="Bleach"/></a></div>
|
12
|
+
|
13
|
+
</td>
|
14
|
+
<td width="100%" style="background: #e0e0e0; border-bottom-right-radius: 5px;" valign="top">
|
15
|
+
<table width="100%" cellspacing="0" cellpadding="0" border="0">
|
16
|
+
<tr class="c_h2">
|
17
|
+
<td width="80" valign="top"><b>Title(s):</b></td>
|
18
|
+
<td>
|
19
|
+
Bleach<br/>
|
20
|
+
</td>
|
21
|
+
</tr>
|
22
|
+
<tr class="c_h2b">
|
23
|
+
<td valign="top"><b>Creator(s):</b></td>
|
24
|
+
<td>
|
25
|
+
<a href="#">Lorem Ipsum</a>
|
26
|
+
</td>
|
27
|
+
</tr>
|
28
|
+
<tr class="c_h2">
|
29
|
+
<td valign="top"><b>Genres:</b></td>
|
30
|
+
<td>
|
31
|
+
<a href="#">Action</a>,
|
32
|
+
<a href="#">Adventure</a>,
|
33
|
+
<a href="#">Anime</a>,
|
34
|
+
<a href="#">Comedy</a>,
|
35
|
+
<a href="#">Drama</a>,
|
36
|
+
<a href="#">Fantasy</a>,
|
37
|
+
<a href="#">Shounen</a>,
|
38
|
+
<a href="#">Supernatural</a>
|
39
|
+
</td>
|
40
|
+
</tr>
|
41
|
+
<tr class="c_h2b">
|
42
|
+
<td valign="top"><b>Start Date:</b></td>
|
43
|
+
<td>
|
44
|
+
2001 </td>
|
45
|
+
</tr>
|
46
|
+
<tr class="c_h2">
|
47
|
+
<td valign="top"><b>Status:</b></td>
|
48
|
+
<td>
|
49
|
+
<span style="color: ">Ongoing</span> </td>
|
50
|
+
</tr>
|
51
|
+
<tr class="c_h2b">
|
52
|
+
<td valign="top" style="border-bottom: 0;"><b>Summary:</b></td>
|
53
|
+
<td style="border-bottom: 0;">
|
54
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
55
|
+
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
56
|
+
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
57
|
+
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
58
|
+
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
59
|
+
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.<br />
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
</table>
|
65
|
+
</div>
|
66
|
+
|
67
|
+
<div class="c_h1" style="margin-top:1px;">Chapters</div>
|
68
|
+
|
69
|
+
<div class="episode c_h2">
|
70
|
+
<div><a class="download-link" href="chapters/1/1.html">Bleach <em>chapter</em> <strong>1</strong></a> </div>
|
71
|
+
<div class="clear"></div>
|
72
|
+
</div>
|
73
|
+
<div class="episode c_h2b">
|
74
|
+
<div><a class="download-link" href="chapters/2/1.html">Bleach <em>chapter</em> <strong>2</strong></a> </div>
|
75
|
+
<div class="clear"></div>
|
76
|
+
</div>
|
77
|
+
<div class="episode c_h2">
|
78
|
+
<div><a class="download-link" href="chapters/3/1.html">Bleach <em>chapter</em> <strong>3</strong></a> </div>
|
79
|
+
<div class="clear"></div>
|
80
|
+
</div>
|
81
|
+
|
82
|
+
</body>
|
83
|
+
</html>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="2.html">
|
7
|
+
<img id="img" src="mushroom_risotto.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../2/1.html">
|
7
|
+
<img id="img" src="vegetable_curry.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
Binary file
|
Binary file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="2.html">
|
7
|
+
<img id="img" src="angry_birds_cake.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../3/1.html">
|
7
|
+
<img id="img" src="thai_shrimp_cake.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
Binary file
|
Binary file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../3/2.html">
|
7
|
+
<img id="img" src="instant_noodle_with_egg.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="#">
|
7
|
+
<img id="img" src="noodle_with_bbq_pork.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
Binary file
|
Binary file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="2.html">
|
7
|
+
<img id="img" src="full_breakfast.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../2/1.html">
|
7
|
+
<img id="img" src="white_chocolate_donut.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
Binary file
|
Binary file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="2.html">
|
7
|
+
<img id="img" src="green_tea.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../3/1.html">
|
7
|
+
<img id="img" src="ham_and_egg_sandwich.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
Binary file
|
Binary file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../3/2.html">
|
7
|
+
<img id="img" src="starbucks_coffee.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="#">
|
7
|
+
<img id="img" src="japanese_noodle_with_pork.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
Binary file
|
Binary file
|
@@ -0,0 +1,85 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>One Piece</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
|
7
|
+
<div class="c_h1">Information</div>
|
8
|
+
|
9
|
+
<div>
|
10
|
+
<table width="100%" border="0" cellspacing="0" cellpadding="0">
|
11
|
+
<tr>
|
12
|
+
<td valign="top" style="background: #f0f0f0; padding: 5px; border-right: 1px solid #fff;border-bottom-left-radius: 5px;">
|
13
|
+
<div><a class="olol" title="One Piece" href="#"><img class="a_img" src="../images/default_onepiece.jpg" alt="One Piece"/></a></div>
|
14
|
+
</td>
|
15
|
+
<td width="100%" style="background: #e0e0e0; border-bottom-right-radius: 5px;" valign="top">
|
16
|
+
<table width="100%" cellspacing="0" cellpadding="0" border="0">
|
17
|
+
<tr class="c_h2">
|
18
|
+
<td width="80" valign="top"><b>Title(s):</b></td>
|
19
|
+
<td>
|
20
|
+
One Piece<br/>
|
21
|
+
</td>
|
22
|
+
</tr>
|
23
|
+
<tr class="c_h2b">
|
24
|
+
<td valign="top"><b>Creator(s):</b></td>
|
25
|
+
<td>
|
26
|
+
<a href="/manga/search?c=321">Lorem Ipsum</a>
|
27
|
+
</td>
|
28
|
+
</tr>
|
29
|
+
<tr class="c_h2">
|
30
|
+
<td valign="top"><b>Genres:</b></td>
|
31
|
+
<td>
|
32
|
+
<a href="#">Action</a>,
|
33
|
+
<a href="#">Adventure</a>,
|
34
|
+
<a href="#">Anime</a>,
|
35
|
+
<a href="#">Comedy</a>,
|
36
|
+
<a href="#">Drama</a>,
|
37
|
+
<a href="#">Fantasy</a>,
|
38
|
+
<a href="#">Shounen</a>
|
39
|
+
</td>
|
40
|
+
</tr>
|
41
|
+
<tr class="c_h2b">
|
42
|
+
<td valign="top"><b>Start Date:</b></td>
|
43
|
+
<td>
|
44
|
+
1999 </td>
|
45
|
+
</tr>
|
46
|
+
<tr class="c_h2">
|
47
|
+
<td valign="top"><b>Status:</b></td>
|
48
|
+
<td>
|
49
|
+
<span style="color: ">Ongoing</span> </td>
|
50
|
+
</tr>
|
51
|
+
<tr class="c_h2b">
|
52
|
+
<td valign="top" style="border-bottom: 0;"><b>Summary:</b></td>
|
53
|
+
<td style="border-bottom: 0;">
|
54
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
55
|
+
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
56
|
+
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
57
|
+
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
58
|
+
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
59
|
+
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.<br />
|
60
|
+
<br />
|
61
|
+
</td>
|
62
|
+
</tr>
|
63
|
+
</table>
|
64
|
+
</td>
|
65
|
+
</tr>
|
66
|
+
</table>
|
67
|
+
</div>
|
68
|
+
|
69
|
+
<div class="c_h1" style="margin-top:1px;">Chapters</div>
|
70
|
+
|
71
|
+
<div class="episode c_h2">
|
72
|
+
<div><a class="download-link" href="chapters/1/1.html">One Piece <em>chapter</em> <strong>1</strong></a> </div>
|
73
|
+
<div class="clear"></div>
|
74
|
+
</div>
|
75
|
+
<div class="episode c_h2b">
|
76
|
+
<div><a class="download-link" href="chapters/2/1.html">One Piece <em>chapter</em> <strong>2</strong></a> </div>
|
77
|
+
<div class="clear"></div>
|
78
|
+
</div>
|
79
|
+
<div class="episode c_h2">
|
80
|
+
<div><a class="download-link" href="chapters/3/1.html">One Piece <em>chapter</em> <strong>3</strong></a> </div>
|
81
|
+
<div class="clear"></div>
|
82
|
+
</div>
|
83
|
+
|
84
|
+
</body>
|
85
|
+
</html>
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>A sample manga website</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<div class="simple_div">
|
7
|
+
<ul class="simple_div">
|
8
|
+
<li><a href="Naruto/naruto.html">Naruto</a></li>
|
9
|
+
<li><a href="Bleach/bleach.html">Bleach</a></li>
|
10
|
+
<li><a href="OnePiece/one_piece.html">One Piece</a></li>
|
11
|
+
</ul>
|
12
|
+
</div>
|
13
|
+
</body>
|
14
|
+
</html>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="2.html">
|
7
|
+
<img id="img" src="duck.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../2/1.html">
|
7
|
+
<img id="img" src="hamburger.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
File without changes
|
Binary file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="2.html">
|
7
|
+
<img id="img" src="egg_benedict.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../3/1.html">
|
7
|
+
<img id="img" src="ham_and_cheese_panini.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
Binary file
|
Binary file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="../3/2.html">
|
7
|
+
<img id="img" src="starbucks_coffee.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<select id="page_switch" class="bb_drop2" style="border: 1px solid #0b7eb5;">
|
2
|
+
<option selected value="1.html">1</option>
|
3
|
+
<option value="2.html">2</option></select> of <strong>2</strong>
|
4
|
+
|
5
|
+
<div id="imgholder">
|
6
|
+
<a href="#">
|
7
|
+
<img id="img" src="japanese_noodle_with_pork.jpg" alt="An image" name="img"/>
|
8
|
+
</a>
|
9
|
+
</div>
|
Binary file
|
Binary file
|
@@ -0,0 +1,85 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>Naruto</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
|
7
|
+
<div class="c_h1">Information</div>
|
8
|
+
|
9
|
+
<div>
|
10
|
+
<table width="100%" border="0" cellspacing="0" cellpadding="0">
|
11
|
+
<tr>
|
12
|
+
<td valign="top" style="background: #f0f0f0; padding: 5px; border-right: 1px solid #fff;border-bottom-left-radius: 5px;">
|
13
|
+
<div><a class="olol" title="Naruto" href="#"><img class="a_img" src="../images/default_naruto.jpg" alt="Naruto"/></a></div>
|
14
|
+
</td>
|
15
|
+
<td width="100%" style="background: #e0e0e0; border-bottom-right-radius: 5px;" valign="top">
|
16
|
+
<table width="100%" cellspacing="0" cellpadding="0" border="0">
|
17
|
+
<tr class="c_h2">
|
18
|
+
<td width="80" valign="top"><b>Title(s):</b></td>
|
19
|
+
<td>
|
20
|
+
Naruto<br/>
|
21
|
+
</td>
|
22
|
+
</tr>
|
23
|
+
<tr class="c_h2b">
|
24
|
+
<td valign="top"><b>Creator(s):</b></td>
|
25
|
+
<td>
|
26
|
+
<a href="#">Lorem Ipsum</a>
|
27
|
+
</td>
|
28
|
+
</tr>
|
29
|
+
<tr class="c_h2">
|
30
|
+
<td valign="top"><b>Genres:</b></td>
|
31
|
+
<td>
|
32
|
+
<a href="#">Action</a>,
|
33
|
+
<a href="#">Adventure</a>,
|
34
|
+
<a href="#">Anime</a>,
|
35
|
+
<a href="#">Comedy</a>,
|
36
|
+
<a href="#">Drama</a>,
|
37
|
+
<a href="#">Fantasy</a>,
|
38
|
+
<a href="#">Shounen</a>
|
39
|
+
</td>
|
40
|
+
</tr>
|
41
|
+
<tr class="c_h2b">
|
42
|
+
<td valign="top"><b>Start Date:</b></td>
|
43
|
+
<td>
|
44
|
+
1999 </td>
|
45
|
+
</tr>
|
46
|
+
<tr class="c_h2">
|
47
|
+
<td valign="top"><b>Status:</b></td>
|
48
|
+
<td>
|
49
|
+
<span style="color: ">Ongoing</span> </td>
|
50
|
+
</tr>
|
51
|
+
<tr class="c_h2b">
|
52
|
+
<td valign="top" style="border-bottom: 0;"><b>Summary:</b></td>
|
53
|
+
<td style="border-bottom: 0;">
|
54
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
55
|
+
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
56
|
+
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
57
|
+
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
58
|
+
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
59
|
+
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.<br />
|
60
|
+
<br />
|
61
|
+
</td>
|
62
|
+
</tr>
|
63
|
+
</table>
|
64
|
+
</td>
|
65
|
+
</tr>
|
66
|
+
</table>
|
67
|
+
</div>
|
68
|
+
|
69
|
+
<div class="c_h1" style="margin-top:1px;">Chapters</div>
|
70
|
+
|
71
|
+
<div class="episode c_h2">
|
72
|
+
<div><a class="download-link" href="chapters/1/1.html">Naruto <em>chapter</em> <strong>1</strong></a> </div>
|
73
|
+
<div class="clear"></div>
|
74
|
+
</div>
|
75
|
+
<div class="episode c_h2b">
|
76
|
+
<div><a class="download-link" href="chapters/2/1.html">Naruto <em>chapter</em> <strong>2</strong></a> </div>
|
77
|
+
<div class="clear"></div>
|
78
|
+
</div>
|
79
|
+
<div class="episode c_h2">
|
80
|
+
<div><a class="download-link" href="chapters/3/1.html">Naruto <em>chapter</em> <strong>3</strong></a> </div>
|
81
|
+
<div class="clear"></div>
|
82
|
+
</div>
|
83
|
+
|
84
|
+
</body>
|
85
|
+
</html>
|
@@ -3,55 +3,77 @@ require "test_helper"
|
|
3
3
|
describe MangaCrawler::Crawler do
|
4
4
|
|
5
5
|
crawler = MangaCrawler::Crawler.new
|
6
|
+
base_url = "localhost"
|
6
7
|
|
7
8
|
it "must retrieve mangas" do
|
8
|
-
|
9
|
-
|
9
|
+
|
10
|
+
sample_index_page = File.open("test/fixtures/index.html")
|
10
11
|
css_path = "a"
|
11
12
|
html_field = :href
|
13
|
+
|
14
|
+
params = Website::Parameters.new(base_url, sample_index_page, css_path, html_field)
|
15
|
+
index_page = Website::Page.new(params)
|
12
16
|
|
13
|
-
mangas = crawler.get_mangas
|
17
|
+
mangas = crawler.get_mangas index_page
|
14
18
|
|
15
|
-
mangas.must_equal [ ["Naruto", "/
|
16
|
-
["Bleach", "/
|
17
|
-
["One Piece", "/
|
19
|
+
mangas.must_equal [ ["Naruto", "Naruto/naruto.html"],
|
20
|
+
["Bleach", "Bleach/bleach.html"],
|
21
|
+
["One Piece", "OnePiece/one_piece.html"] ]
|
18
22
|
end
|
19
23
|
|
20
24
|
it "must retrieve chapters" do
|
21
25
|
|
22
|
-
sample_manga_page = File.open("test/
|
23
|
-
css_path = "
|
26
|
+
sample_manga_page = File.open("test/fixtures/Bleach/bleach.html")
|
27
|
+
css_path = ".download-link"
|
24
28
|
html_field = :href
|
25
29
|
|
26
|
-
|
30
|
+
params = Website::Parameters.new(base_url, sample_manga_page, css_path, html_field)
|
31
|
+
manga_page = Website::Page.new(params)
|
32
|
+
|
33
|
+
chapters = crawler.get_chapters manga_page
|
27
34
|
|
28
|
-
chapters.must_equal [ ["
|
29
|
-
["
|
30
|
-
["
|
35
|
+
chapters.must_equal [ ["Bleach chapter 1", "chapters/1/1.html"],
|
36
|
+
["Bleach chapter 2", "chapters/2/1.html"],
|
37
|
+
["Bleach chapter 3", "chapters/3/1.html"] ]
|
31
38
|
end
|
32
39
|
|
33
40
|
it "must retrieve a direct image link from a page" do
|
34
41
|
|
35
|
-
sample_image_page = File.open("test/
|
42
|
+
sample_image_page = File.open("test/fixtures/naruto/chapters/1/1.html")
|
36
43
|
css_path = "#img"
|
37
44
|
html_field = :src
|
38
45
|
|
39
|
-
|
46
|
+
params = Website::Parameters.new(base_url, sample_image_page, css_path, html_field)
|
47
|
+
image_page = Website::Page.new(params)
|
48
|
+
|
49
|
+
image = crawler.get_image_from_page image_page
|
40
50
|
|
41
|
-
image.must_equal "
|
51
|
+
image.must_equal "duck.jpg"
|
42
52
|
end
|
43
53
|
|
44
54
|
it "must retrieve all pages links from a chapter" do
|
45
55
|
|
46
|
-
sample_image_page = File.open("test/
|
47
|
-
css_path = "#
|
56
|
+
sample_image_page = File.open("test/fixtures/OnePiece/chapters/2/1.html")
|
57
|
+
css_path = "#page_switch option"
|
48
58
|
html_field = :value
|
59
|
+
|
60
|
+
params = Website::Parameters.new(base_url, sample_image_page, css_path, html_field)
|
61
|
+
pages = Website::Page.new(params)
|
49
62
|
|
50
|
-
pages_links = crawler.get_pages_links_from_chapter
|
63
|
+
pages_links = crawler.get_pages_links_from_chapter pages
|
64
|
+
|
65
|
+
pages_links.must_equal [ ["1", "1.html"], ["2", "2.html"] ]
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
it "must collect all pages from a given chapter" do
|
70
|
+
link = "https://starkana.me/manga/0/A_Princess_and_a_Bum_(Manhwa)/chapter/7"
|
71
|
+
css_pages_path = "#page_switch option"
|
72
|
+
pages_html_field = :value
|
51
73
|
|
52
|
-
|
53
|
-
|
54
|
-
["3", "/first-manga/1/3"] ]
|
74
|
+
params = Website::Parameters.new("https://starkana.me", link, css_pages_path, pages_html_field)
|
75
|
+
chapter_page = Website::Page.new(params)
|
55
76
|
|
77
|
+
crawler.get_pages chapter_page, "#pic img"
|
56
78
|
end
|
57
79
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
describe Website::Page do
|
4
|
+
|
5
|
+
params = Website::Parameters.new("","","","")
|
6
|
+
page = Website::Page.new(params)
|
7
|
+
|
8
|
+
it "must have parameters" do
|
9
|
+
page.params.wont_be_nil
|
10
|
+
lambda { Website::Page.new(nil) }.must_raise(RuntimeError)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "params must be an instance of Website::Parameters" do
|
14
|
+
lambda { Website::Page.new("invalid params") }.must_raise(RuntimeError)
|
15
|
+
lambda { Website::Page.new(params) }.must_be_silent
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
describe Website::Parameters do
|
4
|
+
|
5
|
+
describe "default attributes" do
|
6
|
+
|
7
|
+
base_url = "lorem.ipsum.dolor"
|
8
|
+
current_url = "/lorem"
|
9
|
+
css_path = "#lorem"
|
10
|
+
html_field = "src"
|
11
|
+
params = Website::Parameters.new(base_url, current_url, css_path, html_field)
|
12
|
+
|
13
|
+
it "must have a base url" do
|
14
|
+
params.base_url.wont_be_nil
|
15
|
+
end
|
16
|
+
|
17
|
+
it "must have a current url" do
|
18
|
+
params.current_url.wont_be_nil
|
19
|
+
end
|
20
|
+
|
21
|
+
it "must have a css path" do
|
22
|
+
params.css_path.wont_be_nil
|
23
|
+
end
|
24
|
+
|
25
|
+
it "must have a html attribute" do
|
26
|
+
params.html_field.wont_be_nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: manga-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -74,15 +74,58 @@ files:
|
|
74
74
|
- lib/manga-crawler.rb
|
75
75
|
- lib/manga-crawler/crawler.rb
|
76
76
|
- lib/manga-crawler/version.rb
|
77
|
+
- lib/website/page.rb
|
78
|
+
- lib/website/parameters.rb
|
77
79
|
- manga-crawler.gemspec
|
80
|
+
- test/fixtures/Bleach/bleach.html
|
81
|
+
- test/fixtures/Bleach/chapters/1/1.html
|
82
|
+
- test/fixtures/Bleach/chapters/1/2.html
|
83
|
+
- test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
|
84
|
+
- test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
|
85
|
+
- test/fixtures/Bleach/chapters/2/1.html
|
86
|
+
- test/fixtures/Bleach/chapters/2/2.html
|
87
|
+
- test/fixtures/Bleach/chapters/2/angry_birds_cake.jpg
|
88
|
+
- test/fixtures/Bleach/chapters/2/thai_shrimp_cake.jpg
|
89
|
+
- test/fixtures/Bleach/chapters/3/1.html
|
90
|
+
- test/fixtures/Bleach/chapters/3/2.html
|
91
|
+
- test/fixtures/Bleach/chapters/3/instant_noodle_with_egg.jpg
|
92
|
+
- test/fixtures/Bleach/chapters/3/noodle_with_bbq_pork.jpg
|
93
|
+
- test/fixtures/OnePiece/chapters/1/1.html
|
94
|
+
- test/fixtures/OnePiece/chapters/1/2.html
|
95
|
+
- test/fixtures/OnePiece/chapters/1/full_breakfast.jpg
|
96
|
+
- test/fixtures/OnePiece/chapters/1/white_chocolate_donut.jpg
|
97
|
+
- test/fixtures/OnePiece/chapters/2/1.html
|
98
|
+
- test/fixtures/OnePiece/chapters/2/2.html
|
99
|
+
- test/fixtures/OnePiece/chapters/2/green_tea.jpg
|
100
|
+
- test/fixtures/OnePiece/chapters/2/ham_and_egg_sandwich.jpg
|
101
|
+
- test/fixtures/OnePiece/chapters/3/1.html
|
102
|
+
- test/fixtures/OnePiece/chapters/3/2.html
|
103
|
+
- test/fixtures/OnePiece/chapters/3/japanese_noodle_with_pork.jpg
|
104
|
+
- test/fixtures/OnePiece/chapters/3/starbucks_coffee.jpg
|
105
|
+
- test/fixtures/OnePiece/one_piece.html
|
106
|
+
- test/fixtures/images/default_bleach.jpg
|
107
|
+
- test/fixtures/images/default_naruto.jpg
|
108
|
+
- test/fixtures/images/default_onepiece.jpg
|
109
|
+
- test/fixtures/index.html
|
110
|
+
- test/fixtures/naruto/chapters/1/1.html
|
111
|
+
- test/fixtures/naruto/chapters/1/2.html
|
112
|
+
- test/fixtures/naruto/chapters/1/duck.jpg
|
113
|
+
- test/fixtures/naruto/chapters/1/hamburger.jpg
|
114
|
+
- test/fixtures/naruto/chapters/2/1.html
|
115
|
+
- test/fixtures/naruto/chapters/2/2.html
|
116
|
+
- test/fixtures/naruto/chapters/2/egg_benedict.jpg
|
117
|
+
- test/fixtures/naruto/chapters/2/ham_and_cheese_panini.jpg
|
118
|
+
- test/fixtures/naruto/chapters/3/1.html
|
119
|
+
- test/fixtures/naruto/chapters/3/2.html
|
120
|
+
- test/fixtures/naruto/chapters/3/japanese_noodle_with_pork.jpg
|
121
|
+
- test/fixtures/naruto/chapters/3/starbucks_coffee.jpg
|
122
|
+
- test/fixtures/naruto/naruto.html
|
78
123
|
- test/lib/manga-crawler/crawler_test.rb
|
79
124
|
- test/lib/manga-crawler/version_test.rb
|
80
|
-
- test/
|
81
|
-
- test/
|
82
|
-
- test/samples/index-page.html
|
83
|
-
- test/samples/manga-page.html
|
125
|
+
- test/lib/website/page_test.rb
|
126
|
+
- test/lib/website/parameters_test.rb
|
84
127
|
- test/test_helper.rb
|
85
|
-
homepage:
|
128
|
+
homepage: https://github.com/thiagokimo/manga-crawler
|
86
129
|
licenses:
|
87
130
|
- MIT
|
88
131
|
post_install_message:
|
@@ -108,10 +151,51 @@ signing_key:
|
|
108
151
|
specification_version: 3
|
109
152
|
summary: Retrieve basic manga information
|
110
153
|
test_files:
|
154
|
+
- test/fixtures/Bleach/bleach.html
|
155
|
+
- test/fixtures/Bleach/chapters/1/1.html
|
156
|
+
- test/fixtures/Bleach/chapters/1/2.html
|
157
|
+
- test/fixtures/Bleach/chapters/1/mushroom_risotto.jpg
|
158
|
+
- test/fixtures/Bleach/chapters/1/vegetable_curry.jpg
|
159
|
+
- test/fixtures/Bleach/chapters/2/1.html
|
160
|
+
- test/fixtures/Bleach/chapters/2/2.html
|
161
|
+
- test/fixtures/Bleach/chapters/2/angry_birds_cake.jpg
|
162
|
+
- test/fixtures/Bleach/chapters/2/thai_shrimp_cake.jpg
|
163
|
+
- test/fixtures/Bleach/chapters/3/1.html
|
164
|
+
- test/fixtures/Bleach/chapters/3/2.html
|
165
|
+
- test/fixtures/Bleach/chapters/3/instant_noodle_with_egg.jpg
|
166
|
+
- test/fixtures/Bleach/chapters/3/noodle_with_bbq_pork.jpg
|
167
|
+
- test/fixtures/OnePiece/chapters/1/1.html
|
168
|
+
- test/fixtures/OnePiece/chapters/1/2.html
|
169
|
+
- test/fixtures/OnePiece/chapters/1/full_breakfast.jpg
|
170
|
+
- test/fixtures/OnePiece/chapters/1/white_chocolate_donut.jpg
|
171
|
+
- test/fixtures/OnePiece/chapters/2/1.html
|
172
|
+
- test/fixtures/OnePiece/chapters/2/2.html
|
173
|
+
- test/fixtures/OnePiece/chapters/2/green_tea.jpg
|
174
|
+
- test/fixtures/OnePiece/chapters/2/ham_and_egg_sandwich.jpg
|
175
|
+
- test/fixtures/OnePiece/chapters/3/1.html
|
176
|
+
- test/fixtures/OnePiece/chapters/3/2.html
|
177
|
+
- test/fixtures/OnePiece/chapters/3/japanese_noodle_with_pork.jpg
|
178
|
+
- test/fixtures/OnePiece/chapters/3/starbucks_coffee.jpg
|
179
|
+
- test/fixtures/OnePiece/one_piece.html
|
180
|
+
- test/fixtures/images/default_bleach.jpg
|
181
|
+
- test/fixtures/images/default_naruto.jpg
|
182
|
+
- test/fixtures/images/default_onepiece.jpg
|
183
|
+
- test/fixtures/index.html
|
184
|
+
- test/fixtures/naruto/chapters/1/1.html
|
185
|
+
- test/fixtures/naruto/chapters/1/2.html
|
186
|
+
- test/fixtures/naruto/chapters/1/duck.jpg
|
187
|
+
- test/fixtures/naruto/chapters/1/hamburger.jpg
|
188
|
+
- test/fixtures/naruto/chapters/2/1.html
|
189
|
+
- test/fixtures/naruto/chapters/2/2.html
|
190
|
+
- test/fixtures/naruto/chapters/2/egg_benedict.jpg
|
191
|
+
- test/fixtures/naruto/chapters/2/ham_and_cheese_panini.jpg
|
192
|
+
- test/fixtures/naruto/chapters/3/1.html
|
193
|
+
- test/fixtures/naruto/chapters/3/2.html
|
194
|
+
- test/fixtures/naruto/chapters/3/japanese_noodle_with_pork.jpg
|
195
|
+
- test/fixtures/naruto/chapters/3/starbucks_coffee.jpg
|
196
|
+
- test/fixtures/naruto/naruto.html
|
111
197
|
- test/lib/manga-crawler/crawler_test.rb
|
112
198
|
- test/lib/manga-crawler/version_test.rb
|
113
|
-
- test/
|
114
|
-
- test/
|
115
|
-
- test/samples/index-page.html
|
116
|
-
- test/samples/manga-page.html
|
199
|
+
- test/lib/website/page_test.rb
|
200
|
+
- test/lib/website/parameters_test.rb
|
117
201
|
- test/test_helper.rb
|
@@ -1,21 +0,0 @@
|
|
1
|
-
<html>
|
2
|
-
<head>
|
3
|
-
<title>A sample manga website</title>
|
4
|
-
</head>
|
5
|
-
<body>
|
6
|
-
|
7
|
-
<div id="selectpage">
|
8
|
-
<select id="pageMenu" name="pageMenu">
|
9
|
-
<option value="/first-manga/1/1" selected="selected">1</option>
|
10
|
-
<option value="/first-manga/1/2">2</option>
|
11
|
-
<option value="/first-manga/1/3">3</option>
|
12
|
-
</select>
|
13
|
-
</div>
|
14
|
-
|
15
|
-
<div id="imgholder">
|
16
|
-
<a href="/first-manga/1/2">
|
17
|
-
<img id="img" src="image.jpg" alt="An image" name="img"/>
|
18
|
-
</a>
|
19
|
-
</div>
|
20
|
-
</body>
|
21
|
-
</html>
|
@@ -1,14 +0,0 @@
|
|
1
|
-
<html>
|
2
|
-
<head>
|
3
|
-
<title>A sample manga website</title>
|
4
|
-
</head>
|
5
|
-
<body>
|
6
|
-
<div class="simple_div">
|
7
|
-
<ul class="simple_div">
|
8
|
-
<li><a href="/first-manga">Naruto</a></li>
|
9
|
-
<li><a href="/second-manga">Bleach</a></li>
|
10
|
-
<li><a href="/third-manga">One Piece</a></li>
|
11
|
-
</ul>
|
12
|
-
</div>
|
13
|
-
</body>
|
14
|
-
</html>
|
@@ -1,38 +0,0 @@
|
|
1
|
-
<html>
|
2
|
-
<head>
|
3
|
-
<title>A sample manga website</title>
|
4
|
-
</head>
|
5
|
-
<body>
|
6
|
-
<div id="chapterlist">
|
7
|
-
<table id="listing">
|
8
|
-
|
9
|
-
<tr class="table_head">
|
10
|
-
<th class="leftgap">Chapter Name</th>
|
11
|
-
<th>Date Added</th>
|
12
|
-
</tr>
|
13
|
-
|
14
|
-
<tr>
|
15
|
-
<td>
|
16
|
-
<div class="chico_manga"></div>
|
17
|
-
<a href="/first-manga/1">Chapter 1</a> : First chapter </td>
|
18
|
-
<td>09/09/2011</td>
|
19
|
-
</tr>
|
20
|
-
|
21
|
-
<tr>
|
22
|
-
<td>
|
23
|
-
<div class="chico_manga"></div>
|
24
|
-
<a href="/second-manga/2">Chapter 2</a> : Second chapter</td>
|
25
|
-
<td>09/09/2011</td>
|
26
|
-
</tr>
|
27
|
-
|
28
|
-
<tr>
|
29
|
-
<td>
|
30
|
-
<div class="chico_manga"></div>
|
31
|
-
<a href="/third-manga/3">Chapter 3</a> : Third chapter</td>
|
32
|
-
<td>09/09/2011</td>
|
33
|
-
</tr>
|
34
|
-
|
35
|
-
</table>
|
36
|
-
</div>
|
37
|
-
</body>
|
38
|
-
</html>
|