yamd 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/yamd +23 -0
- data/lib/yamd.rb +124 -0
- data/lib/yamd/mangafox.rb +56 -0
- data/lib/yamd/mangahere.rb +46 -0
- metadata +79 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: af2596743157ae9a1eeb8b4f0d485b662de4dbf9
|
4
|
+
data.tar.gz: 0c678459bf2ae8f667aec151f775d4add6188634
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1488d9483222638626b7b538f2566f0a38074cb0940477f3e76ad74f9d17ba3ec39a4dba59f5a352e17bbae583b682708c9603b72f1d9360cabe5d162ddcccae
|
7
|
+
data.tar.gz: 77fb539ec5ea0e2d84f0148b3e0b8a1132b4549a9eeae3ea4032d7b958707fee2bebb3d325b509935dff537c8ece42229019e77fcbd9f212df5c1424b2f1f3ae
|
data/bin/yamd
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'yamd/mangahere'
|
4
|
+
require 'yamd/mangafox'
|
5
|
+
|
6
|
+
unless ARGV.size > 0
|
7
|
+
puts 'USAGE: yamd <manga main page url>'
|
8
|
+
puts 'EXAMPLE: yamd http://www.mangahere.com/manga/asagao_to_kase_san/'
|
9
|
+
puts 'Support mangahere and mangafox sites so far.'
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
|
13
|
+
manga_main_page_url = ARGV[0]
|
14
|
+
if /mangafox/.match(manga_main_page_url)
|
15
|
+
manga = MangafoxCrawler.new(manga_main_page_url)
|
16
|
+
elsif /mangahere/.match(manga_main_page_url)
|
17
|
+
manga = MangahereCrawler.new(manga_main_page_url)
|
18
|
+
else
|
19
|
+
puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
|
20
|
+
end
|
21
|
+
|
22
|
+
ImageDownloader.new.download(manga)
|
23
|
+
|
data/lib/yamd.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'addressable/uri'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
class PageCrawler
|
7
|
+
attr_reader :custom_data, :url, :parsed_html, :number, :chapter
|
8
|
+
|
9
|
+
def initialize(custom_data, parsed_html, number, chapter)
|
10
|
+
@custom_data = custom_data
|
11
|
+
@url = custom_data[:url]
|
12
|
+
@parsed_html = parsed_html
|
13
|
+
@number = number
|
14
|
+
@chapter = chapter
|
15
|
+
end
|
16
|
+
|
17
|
+
def image_url
|
18
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class ChapterCrawler
|
23
|
+
attr_reader :custom_data, :url, :parsed_html, :number, :manga
|
24
|
+
|
25
|
+
def initialize(custom_data, chapter_page, number, manga)
|
26
|
+
@custom_data = custom_data
|
27
|
+
@url = custom_data[:url]
|
28
|
+
@number = number
|
29
|
+
@parsed_html = chapter_page
|
30
|
+
@manga = manga
|
31
|
+
end
|
32
|
+
|
33
|
+
def pages_info
|
34
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
35
|
+
end
|
36
|
+
|
37
|
+
def name
|
38
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
39
|
+
end
|
40
|
+
|
41
|
+
def pages
|
42
|
+
Enumerator.new do | yielder |
|
43
|
+
number = 1
|
44
|
+
pages_info.each do | page_info |
|
45
|
+
parsed_html = Nokogiri::HTML(open(page_info[:url]))
|
46
|
+
yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
|
47
|
+
number += 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.page_class
|
53
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class MangaCrawler
|
58
|
+
attr_accessor :url, :parsed_html
|
59
|
+
|
60
|
+
def initialize(manga_main_page_url)
|
61
|
+
@url = manga_main_page_url
|
62
|
+
@parsed_html = Nokogiri::HTML(open(manga_main_page_url))
|
63
|
+
end
|
64
|
+
|
65
|
+
def chapters_info
|
66
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
67
|
+
end
|
68
|
+
|
69
|
+
def chapters
|
70
|
+
Enumerator.new do | yielder |
|
71
|
+
number = 1
|
72
|
+
chapters_info.each do | chapter_info |
|
73
|
+
page = Nokogiri::HTML(open(chapter_info[:url]))
|
74
|
+
yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
|
75
|
+
number += 1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def name
|
81
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.chapter_class
|
85
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class ImageDownloader
|
90
|
+
def initialize(base_dir = Dir.pwd)
|
91
|
+
@base_dir = base_dir
|
92
|
+
end
|
93
|
+
|
94
|
+
def download(manga)
|
95
|
+
manga_dir = Pathname.new(@base_dir).join(manga.name + '/')
|
96
|
+
if manga_dir.exist?
|
97
|
+
p 'Manga dir exists. Skipping each existing chapter. If the script was forced to stop the last downloaded chapter can be incomplete. Remove it to be downloaded again.'
|
98
|
+
else
|
99
|
+
Dir.mkdir(manga_dir.to_s)
|
100
|
+
end
|
101
|
+
manga.chapters.each do | chapter |
|
102
|
+
chapter_dir = manga_dir.join(chapter.name + '/')
|
103
|
+
unless chapter_dir.exist?
|
104
|
+
Dir.mkdir(chapter_dir.to_s)
|
105
|
+
chapter.pages.each do | page |
|
106
|
+
page_name = self.class.format_page_name(page, chapter, manga)
|
107
|
+
page_abs_path = chapter_dir.join(page_name).to_s
|
108
|
+
File.open(page_abs_path, 'wb') do | f |
|
109
|
+
open(page.image_url, 'rb') do | image |
|
110
|
+
f.write(image.read)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end # end "unless chapter_dir.exist?"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.format_page_name(page, chapter, manga)
|
119
|
+
# TODO: usar log10 de chapter.pages.size para determinar padding de zeros
|
120
|
+
page_path = Addressable::URI.parse(page.image_url).path
|
121
|
+
format("%04d", page.number) + File.extname(page_path)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'yamd'
|
2
|
+
require 'addressable/uri'
|
3
|
+
|
4
|
+
class MangafoxPage < PageCrawler
|
5
|
+
def image_url
|
6
|
+
@parsed_html.at_css('#viewer a img')['src']
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class MangafoxChapter < ChapterCrawler
|
11
|
+
def self.page_class
|
12
|
+
MangafoxPage
|
13
|
+
end
|
14
|
+
|
15
|
+
def pages_info
|
16
|
+
# there's no need of an lazy enumerator here, no IO action is taken
|
17
|
+
page_options = @parsed_html.at_css('.prev_page + div > select').css('option')
|
18
|
+
pages_number = page_options.map { | option | option['value'].to_i }.max
|
19
|
+
|
20
|
+
chapter_base_url = Addressable::URI.parse(self.url)
|
21
|
+
chapter_base_path, _ = Pathname.new(chapter_base_url.path).split
|
22
|
+
page_urls = (1..pages_number).to_a.map do | i |
|
23
|
+
chapter_base_url.path = chapter_base_path.join("#{i}.html").to_s
|
24
|
+
{ url: chapter_base_url.to_s }
|
25
|
+
end
|
26
|
+
|
27
|
+
page_urls
|
28
|
+
end
|
29
|
+
|
30
|
+
def name
|
31
|
+
@custom_data[:name]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class MangafoxCrawler < MangaCrawler
|
36
|
+
def chapters_info
|
37
|
+
@parsed_html.css('#chapters ul li div').reverse.map do | chapter_link |
|
38
|
+
name = chapter_link.at_css('a.tips').text
|
39
|
+
# the real chapter name not always exist
|
40
|
+
span_with_real_name = chapter_link.at_css('a.tips + span')
|
41
|
+
name = name + ' - ' + span_with_real_name.text if span_with_real_name
|
42
|
+
{ name: name,
|
43
|
+
url: chapter_link.at_css('a.tips')['href']
|
44
|
+
}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.chapter_class
|
49
|
+
MangafoxChapter
|
50
|
+
end
|
51
|
+
|
52
|
+
def name
|
53
|
+
@parsed_html.at_css('h1').text.match(/(.+) Manga/)[1]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'yamd'
|
2
|
+
|
3
|
+
class MangaherePage < PageCrawler
|
4
|
+
def image_url
|
5
|
+
@parsed_html.at_css('#viewer a img')['src']
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class MangahereChapter < ChapterCrawler
|
10
|
+
def self.page_class
|
11
|
+
MangaherePage
|
12
|
+
end
|
13
|
+
|
14
|
+
def pages_info
|
15
|
+
# there's no need of an lazy enumerator here, no IO action is taken
|
16
|
+
page_options = @parsed_html.at_css('.prew_page + select').css('option')
|
17
|
+
page_urls = []
|
18
|
+
page_options.each do | option |
|
19
|
+
page_urls << { url: option['value'] }
|
20
|
+
end
|
21
|
+
page_urls
|
22
|
+
end
|
23
|
+
|
24
|
+
def name
|
25
|
+
@custom_data[:name]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class MangahereCrawler < MangaCrawler
|
30
|
+
def self.chapter_class
|
31
|
+
MangahereChapter
|
32
|
+
end
|
33
|
+
|
34
|
+
def chapters_info
|
35
|
+
@parsed_html.css('.detail_list ul li a').reverse.map do | chapter_link |
|
36
|
+
{ name: chapter_link.text.strip,
|
37
|
+
url: chapter_link['href']
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def name
|
43
|
+
@parsed_html.at_css('h1').text.strip
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
metadata
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yamd
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Henrique Becker
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-07-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: addressable
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.3'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.3'
|
41
|
+
description: 'This gem offers: classes to subclass and create a manga site crawler;
|
42
|
+
a dowloader to use with these classes; some site-specific scripts.'
|
43
|
+
email: henriquebecker91@gmail.com
|
44
|
+
executables:
|
45
|
+
- yamd
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lib/yamd/mangafox.rb
|
50
|
+
- lib/yamd/mangahere.rb
|
51
|
+
- lib/yamd.rb
|
52
|
+
- bin/yamd
|
53
|
+
homepage: http://rubygems.org/gems/yamd
|
54
|
+
licenses:
|
55
|
+
- Public domain
|
56
|
+
metadata: {}
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
requirements: []
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 2.0.3
|
74
|
+
signing_key:
|
75
|
+
specification_version: 4
|
76
|
+
summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga
|
77
|
+
downloaders
|
78
|
+
test_files: []
|
79
|
+
has_rdoc: true
|