yamd 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: af2596743157ae9a1eeb8b4f0d485b662de4dbf9
4
+ data.tar.gz: 0c678459bf2ae8f667aec151f775d4add6188634
5
+ SHA512:
6
+ metadata.gz: 1488d9483222638626b7b538f2566f0a38074cb0940477f3e76ad74f9d17ba3ec39a4dba59f5a352e17bbae583b682708c9603b72f1d9360cabe5d162ddcccae
7
+ data.tar.gz: 77fb539ec5ea0e2d84f0148b3e0b8a1132b4549a9eeae3ea4032d7b958707fee2bebb3d325b509935dff537c8ece42229019e77fcbd9f212df5c1424b2f1f3ae
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'yamd/mangahere'
4
+ require 'yamd/mangafox'
5
+
6
+ unless ARGV.size > 0
7
+ puts 'USAGE: yamd <manga main page url>'
8
+ puts 'EXAMPLE: yamd http://www.mangahere.com/manga/asagao_to_kase_san/'
9
+ puts 'Support mangahere and mangafox sites so far.'
10
+ exit
11
+ end
12
+
13
+ manga_main_page_url = ARGV[0]
14
+ if /mangafox/.match(manga_main_page_url)
15
+ manga = MangafoxCrawler.new(manga_main_page_url)
16
+ elsif /mangahere/.match(manga_main_page_url)
17
+ manga = MangahereCrawler.new(manga_main_page_url)
18
+ else
19
+ puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
20
+ end
21
+
22
+ ImageDownloader.new.download(manga)
23
+
@@ -0,0 +1,124 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'addressable/uri'
4
+ require 'pathname'
5
+
6
+ class PageCrawler
7
+ attr_reader :custom_data, :url, :parsed_html, :number, :chapter
8
+
9
+ def initialize(custom_data, parsed_html, number, chapter)
10
+ @custom_data = custom_data
11
+ @url = custom_data[:url]
12
+ @parsed_html = parsed_html
13
+ @number = number
14
+ @chapter = chapter
15
+ end
16
+
17
+ def image_url
18
+ fail 'This method is abstract and have to be defined in a subclass.'
19
+ end
20
+ end
21
+
22
+ class ChapterCrawler
23
+ attr_reader :custom_data, :url, :parsed_html, :number, :manga
24
+
25
+ def initialize(custom_data, chapter_page, number, manga)
26
+ @custom_data = custom_data
27
+ @url = custom_data[:url]
28
+ @number = number
29
+ @parsed_html = chapter_page
30
+ @manga = manga
31
+ end
32
+
33
+ def pages_info
34
+ fail 'This method is abstract and have to be defined in a subclass.'
35
+ end
36
+
37
+ def name
38
+ fail 'This method is abstract and have to be defined in a subclass.'
39
+ end
40
+
41
+ def pages
42
+ Enumerator.new do | yielder |
43
+ number = 1
44
+ pages_info.each do | page_info |
45
+ parsed_html = Nokogiri::HTML(open(page_info[:url]))
46
+ yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
47
+ number += 1
48
+ end
49
+ end
50
+ end
51
+
52
+ def self.page_class
53
+ fail 'This method is abstract and have to be defined in a subclass.'
54
+ end
55
+ end
56
+
57
+ class MangaCrawler
58
+ attr_accessor :url, :parsed_html
59
+
60
+ def initialize(manga_main_page_url)
61
+ @url = manga_main_page_url
62
+ @parsed_html = Nokogiri::HTML(open(manga_main_page_url))
63
+ end
64
+
65
+ def chapters_info
66
+ fail 'This method is abstract and have to be defined in a subclass.'
67
+ end
68
+
69
+ def chapters
70
+ Enumerator.new do | yielder |
71
+ number = 1
72
+ chapters_info.each do | chapter_info |
73
+ page = Nokogiri::HTML(open(chapter_info[:url]))
74
+ yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
75
+ number += 1
76
+ end
77
+ end
78
+ end
79
+
80
+ def name
81
+ fail 'This method is abstract and have to be defined in a subclass.'
82
+ end
83
+
84
+ def self.chapter_class
85
+ fail 'This method is abstract and have to be defined in a subclass.'
86
+ end
87
+ end
88
+
89
+ class ImageDownloader
90
+ def initialize(base_dir = Dir.pwd)
91
+ @base_dir = base_dir
92
+ end
93
+
94
+ def download(manga)
95
+ manga_dir = Pathname.new(@base_dir).join(manga.name + '/')
96
+ if manga_dir.exist?
97
+ p 'Manga dir exists. Skipping each existing chapter. If the script was forced to stop the last downloaded chapter can be incomplete. Remove it to be downloaded again.'
98
+ else
99
+ Dir.mkdir(manga_dir.to_s)
100
+ end
101
+ manga.chapters.each do | chapter |
102
+ chapter_dir = manga_dir.join(chapter.name + '/')
103
+ unless chapter_dir.exist?
104
+ Dir.mkdir(chapter_dir.to_s)
105
+ chapter.pages.each do | page |
106
+ page_name = self.class.format_page_name(page, chapter, manga)
107
+ page_abs_path = chapter_dir.join(page_name).to_s
108
+ File.open(page_abs_path, 'wb') do | f |
109
+ open(page.image_url, 'rb') do | image |
110
+ f.write(image.read)
111
+ end
112
+ end
113
+ end
114
+ end # end "unless chapter_dir.exist?"
115
+ end
116
+ end
117
+
118
+ def self.format_page_name(page, chapter, manga)
119
+ # TODO: usar log10 de chapter.pages.size para determinar padding de zeros
120
+ page_path = Addressable::URI.parse(page.image_url).path
121
+ format("%04d", page.number) + File.extname(page_path)
122
+ end
123
+ end
124
+
@@ -0,0 +1,56 @@
1
+ require 'yamd'
2
+ require 'addressable/uri'
3
+
4
+ class MangafoxPage < PageCrawler
5
+ def image_url
6
+ @parsed_html.at_css('#viewer a img')['src']
7
+ end
8
+ end
9
+
10
+ class MangafoxChapter < ChapterCrawler
11
+ def self.page_class
12
+ MangafoxPage
13
+ end
14
+
15
+ def pages_info
16
+ # there's no need of an lazy enumerator here, no IO action is taken
17
+ page_options = @parsed_html.at_css('.prev_page + div > select').css('option')
18
+ pages_number = page_options.map { | option | option['value'].to_i }.max
19
+
20
+ chapter_base_url = Addressable::URI.parse(self.url)
21
+ chapter_base_path, _ = Pathname.new(chapter_base_url.path).split
22
+ page_urls = (1..pages_number).to_a.map do | i |
23
+ chapter_base_url.path = chapter_base_path.join("#{i}.html").to_s
24
+ { url: chapter_base_url.to_s }
25
+ end
26
+
27
+ page_urls
28
+ end
29
+
30
+ def name
31
+ @custom_data[:name]
32
+ end
33
+ end
34
+
35
+ class MangafoxCrawler < MangaCrawler
36
+ def chapters_info
37
+ @parsed_html.css('#chapters ul li div').reverse.map do | chapter_link |
38
+ name = chapter_link.at_css('a.tips').text
39
+ # the real chapter name not always exist
40
+ span_with_real_name = chapter_link.at_css('a.tips + span')
41
+ name = name + ' - ' + span_with_real_name.text if span_with_real_name
42
+ { name: name,
43
+ url: chapter_link.at_css('a.tips')['href']
44
+ }
45
+ end
46
+ end
47
+
48
+ def self.chapter_class
49
+ MangafoxChapter
50
+ end
51
+
52
+ def name
53
+ @parsed_html.at_css('h1').text.match(/(.+) Manga/)[1]
54
+ end
55
+ end
56
+
@@ -0,0 +1,46 @@
1
+ require 'yamd'
2
+
3
+ class MangaherePage < PageCrawler
4
+ def image_url
5
+ @parsed_html.at_css('#viewer a img')['src']
6
+ end
7
+ end
8
+
9
+ class MangahereChapter < ChapterCrawler
10
+ def self.page_class
11
+ MangaherePage
12
+ end
13
+
14
+ def pages_info
15
+ # there's no need of an lazy enumerator here, no IO action is taken
16
+ page_options = @parsed_html.at_css('.prew_page + select').css('option')
17
+ page_urls = []
18
+ page_options.each do | option |
19
+ page_urls << { url: option['value'] }
20
+ end
21
+ page_urls
22
+ end
23
+
24
+ def name
25
+ @custom_data[:name]
26
+ end
27
+ end
28
+
29
+ class MangahereCrawler < MangaCrawler
30
+ def self.chapter_class
31
+ MangahereChapter
32
+ end
33
+
34
+ def chapters_info
35
+ @parsed_html.css('.detail_list ul li a').reverse.map do | chapter_link |
36
+ { name: chapter_link.text.strip,
37
+ url: chapter_link['href']
38
+ }
39
+ end
40
+ end
41
+
42
+ def name
43
+ @parsed_html.at_css('h1').text.strip
44
+ end
45
+ end
46
+
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yamd
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Henrique Becker
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-07-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: addressable
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '2.3'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '2.3'
41
+ description: 'This gem offers: classes to subclass and create a manga site crawler;
42
+ a dowloader to use with these classes; some site-specific scripts.'
43
+ email: henriquebecker91@gmail.com
44
+ executables:
45
+ - yamd
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lib/yamd/mangafox.rb
50
+ - lib/yamd/mangahere.rb
51
+ - lib/yamd.rb
52
+ - bin/yamd
53
+ homepage: http://rubygems.org/gems/yamd
54
+ licenses:
55
+ - Public domain
56
+ metadata: {}
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 2.0.3
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga
77
+ downloaders
78
+ test_files: []
79
+ has_rdoc: true