yamd 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/yamd +23 -0
- data/lib/yamd.rb +124 -0
- data/lib/yamd/mangafox.rb +56 -0
- data/lib/yamd/mangahere.rb +46 -0
- metadata +79 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: af2596743157ae9a1eeb8b4f0d485b662de4dbf9
|
4
|
+
data.tar.gz: 0c678459bf2ae8f667aec151f775d4add6188634
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1488d9483222638626b7b538f2566f0a38074cb0940477f3e76ad74f9d17ba3ec39a4dba59f5a352e17bbae583b682708c9603b72f1d9360cabe5d162ddcccae
|
7
|
+
data.tar.gz: 77fb539ec5ea0e2d84f0148b3e0b8a1132b4549a9eeae3ea4032d7b958707fee2bebb3d325b509935dff537c8ece42229019e77fcbd9f212df5c1424b2f1f3ae
|
data/bin/yamd
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'yamd/mangahere'
|
4
|
+
require 'yamd/mangafox'
|
5
|
+
|
6
|
+
unless ARGV.size > 0
|
7
|
+
puts 'USAGE: yamd <manga main page url>'
|
8
|
+
puts 'EXAMPLE: yamd http://www.mangahere.com/manga/asagao_to_kase_san/'
|
9
|
+
puts 'Support mangahere and mangafox sites so far.'
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
|
13
|
+
manga_main_page_url = ARGV[0]
|
14
|
+
if /mangafox/.match(manga_main_page_url)
|
15
|
+
manga = MangafoxCrawler.new(manga_main_page_url)
|
16
|
+
elsif /mangahere/.match(manga_main_page_url)
|
17
|
+
manga = MangahereCrawler.new(manga_main_page_url)
|
18
|
+
else
|
19
|
+
puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
|
20
|
+
end
|
21
|
+
|
22
|
+
ImageDownloader.new.download(manga)
|
23
|
+
|
data/lib/yamd.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'addressable/uri'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
class PageCrawler
|
7
|
+
attr_reader :custom_data, :url, :parsed_html, :number, :chapter
|
8
|
+
|
9
|
+
def initialize(custom_data, parsed_html, number, chapter)
|
10
|
+
@custom_data = custom_data
|
11
|
+
@url = custom_data[:url]
|
12
|
+
@parsed_html = parsed_html
|
13
|
+
@number = number
|
14
|
+
@chapter = chapter
|
15
|
+
end
|
16
|
+
|
17
|
+
def image_url
|
18
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class ChapterCrawler
|
23
|
+
attr_reader :custom_data, :url, :parsed_html, :number, :manga
|
24
|
+
|
25
|
+
def initialize(custom_data, chapter_page, number, manga)
|
26
|
+
@custom_data = custom_data
|
27
|
+
@url = custom_data[:url]
|
28
|
+
@number = number
|
29
|
+
@parsed_html = chapter_page
|
30
|
+
@manga = manga
|
31
|
+
end
|
32
|
+
|
33
|
+
def pages_info
|
34
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
35
|
+
end
|
36
|
+
|
37
|
+
def name
|
38
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
39
|
+
end
|
40
|
+
|
41
|
+
def pages
|
42
|
+
Enumerator.new do | yielder |
|
43
|
+
number = 1
|
44
|
+
pages_info.each do | page_info |
|
45
|
+
parsed_html = Nokogiri::HTML(open(page_info[:url]))
|
46
|
+
yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
|
47
|
+
number += 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.page_class
|
53
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class MangaCrawler
|
58
|
+
attr_accessor :url, :parsed_html
|
59
|
+
|
60
|
+
def initialize(manga_main_page_url)
|
61
|
+
@url = manga_main_page_url
|
62
|
+
@parsed_html = Nokogiri::HTML(open(manga_main_page_url))
|
63
|
+
end
|
64
|
+
|
65
|
+
def chapters_info
|
66
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
67
|
+
end
|
68
|
+
|
69
|
+
def chapters
|
70
|
+
Enumerator.new do | yielder |
|
71
|
+
number = 1
|
72
|
+
chapters_info.each do | chapter_info |
|
73
|
+
page = Nokogiri::HTML(open(chapter_info[:url]))
|
74
|
+
yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
|
75
|
+
number += 1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def name
|
81
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.chapter_class
|
85
|
+
fail 'This method is abstract and have to be defined in a subclass.'
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class ImageDownloader
|
90
|
+
def initialize(base_dir = Dir.pwd)
|
91
|
+
@base_dir = base_dir
|
92
|
+
end
|
93
|
+
|
94
|
+
def download(manga)
|
95
|
+
manga_dir = Pathname.new(@base_dir).join(manga.name + '/')
|
96
|
+
if manga_dir.exist?
|
97
|
+
p 'Manga dir exists. Skipping each existing chapter. If the script was forced to stop the last downloaded chapter can be incomplete. Remove it to be downloaded again.'
|
98
|
+
else
|
99
|
+
Dir.mkdir(manga_dir.to_s)
|
100
|
+
end
|
101
|
+
manga.chapters.each do | chapter |
|
102
|
+
chapter_dir = manga_dir.join(chapter.name + '/')
|
103
|
+
unless chapter_dir.exist?
|
104
|
+
Dir.mkdir(chapter_dir.to_s)
|
105
|
+
chapter.pages.each do | page |
|
106
|
+
page_name = self.class.format_page_name(page, chapter, manga)
|
107
|
+
page_abs_path = chapter_dir.join(page_name).to_s
|
108
|
+
File.open(page_abs_path, 'wb') do | f |
|
109
|
+
open(page.image_url, 'rb') do | image |
|
110
|
+
f.write(image.read)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end # end "unless chapter_dir.exist?"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.format_page_name(page, chapter, manga)
|
119
|
+
# TODO: usar log10 de chapter.pages.size para determinar padding de zeros
|
120
|
+
page_path = Addressable::URI.parse(page.image_url).path
|
121
|
+
format("%04d", page.number) + File.extname(page_path)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'yamd'
|
2
|
+
require 'addressable/uri'
|
3
|
+
|
4
|
+
class MangafoxPage < PageCrawler
|
5
|
+
def image_url
|
6
|
+
@parsed_html.at_css('#viewer a img')['src']
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class MangafoxChapter < ChapterCrawler
|
11
|
+
def self.page_class
|
12
|
+
MangafoxPage
|
13
|
+
end
|
14
|
+
|
15
|
+
def pages_info
|
16
|
+
# there's no need of an lazy enumerator here, no IO action is taken
|
17
|
+
page_options = @parsed_html.at_css('.prev_page + div > select').css('option')
|
18
|
+
pages_number = page_options.map { | option | option['value'].to_i }.max
|
19
|
+
|
20
|
+
chapter_base_url = Addressable::URI.parse(self.url)
|
21
|
+
chapter_base_path, _ = Pathname.new(chapter_base_url.path).split
|
22
|
+
page_urls = (1..pages_number).to_a.map do | i |
|
23
|
+
chapter_base_url.path = chapter_base_path.join("#{i}.html").to_s
|
24
|
+
{ url: chapter_base_url.to_s }
|
25
|
+
end
|
26
|
+
|
27
|
+
page_urls
|
28
|
+
end
|
29
|
+
|
30
|
+
def name
|
31
|
+
@custom_data[:name]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class MangafoxCrawler < MangaCrawler
|
36
|
+
def chapters_info
|
37
|
+
@parsed_html.css('#chapters ul li div').reverse.map do | chapter_link |
|
38
|
+
name = chapter_link.at_css('a.tips').text
|
39
|
+
# the real chapter name not always exist
|
40
|
+
span_with_real_name = chapter_link.at_css('a.tips + span')
|
41
|
+
name = name + ' - ' + span_with_real_name.text if span_with_real_name
|
42
|
+
{ name: name,
|
43
|
+
url: chapter_link.at_css('a.tips')['href']
|
44
|
+
}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.chapter_class
|
49
|
+
MangafoxChapter
|
50
|
+
end
|
51
|
+
|
52
|
+
def name
|
53
|
+
@parsed_html.at_css('h1').text.match(/(.+) Manga/)[1]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'yamd'
|
2
|
+
|
3
|
+
class MangaherePage < PageCrawler
|
4
|
+
def image_url
|
5
|
+
@parsed_html.at_css('#viewer a img')['src']
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class MangahereChapter < ChapterCrawler
|
10
|
+
def self.page_class
|
11
|
+
MangaherePage
|
12
|
+
end
|
13
|
+
|
14
|
+
def pages_info
|
15
|
+
# there's no need of an lazy enumerator here, no IO action is taken
|
16
|
+
page_options = @parsed_html.at_css('.prew_page + select').css('option')
|
17
|
+
page_urls = []
|
18
|
+
page_options.each do | option |
|
19
|
+
page_urls << { url: option['value'] }
|
20
|
+
end
|
21
|
+
page_urls
|
22
|
+
end
|
23
|
+
|
24
|
+
def name
|
25
|
+
@custom_data[:name]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class MangahereCrawler < MangaCrawler
|
30
|
+
def self.chapter_class
|
31
|
+
MangahereChapter
|
32
|
+
end
|
33
|
+
|
34
|
+
def chapters_info
|
35
|
+
@parsed_html.css('.detail_list ul li a').reverse.map do | chapter_link |
|
36
|
+
{ name: chapter_link.text.strip,
|
37
|
+
url: chapter_link['href']
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def name
|
43
|
+
@parsed_html.at_css('h1').text.strip
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
metadata
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yamd
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Henrique Becker
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-07-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: addressable
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.3'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.3'
|
41
|
+
description: 'This gem offers: classes to subclass and create a manga site crawler;
|
42
|
+
a dowloader to use with these classes; some site-specific scripts.'
|
43
|
+
email: henriquebecker91@gmail.com
|
44
|
+
executables:
|
45
|
+
- yamd
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lib/yamd/mangafox.rb
|
50
|
+
- lib/yamd/mangahere.rb
|
51
|
+
- lib/yamd.rb
|
52
|
+
- bin/yamd
|
53
|
+
homepage: http://rubygems.org/gems/yamd
|
54
|
+
licenses:
|
55
|
+
- Public domain
|
56
|
+
metadata: {}
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
requirements: []
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 2.0.3
|
74
|
+
signing_key:
|
75
|
+
specification_version: 4
|
76
|
+
summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga
|
77
|
+
downloaders
|
78
|
+
test_files: []
|
79
|
+
has_rdoc: true
|