yamd 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: af2596743157ae9a1eeb8b4f0d485b662de4dbf9
4
+ data.tar.gz: 0c678459bf2ae8f667aec151f775d4add6188634
5
+ SHA512:
6
+ metadata.gz: 1488d9483222638626b7b538f2566f0a38074cb0940477f3e76ad74f9d17ba3ec39a4dba59f5a352e17bbae583b682708c9603b72f1d9360cabe5d162ddcccae
7
+ data.tar.gz: 77fb539ec5ea0e2d84f0148b3e0b8a1132b4549a9eeae3ea4032d7b958707fee2bebb3d325b509935dff537c8ece42229019e77fcbd9f212df5c1424b2f1f3ae
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'yamd/mangahere'
4
+ require 'yamd/mangafox'
5
+
6
+ unless ARGV.size > 0
7
+ puts 'USAGE: yamd <manga main page url>'
8
+ puts 'EXAMPLE: yamd http://www.mangahere.com/manga/asagao_to_kase_san/'
9
+ puts 'Support mangahere and mangafox sites so far.'
10
+ exit
11
+ end
12
+
13
+ manga_main_page_url = ARGV[0]
14
+ if /mangafox/.match(manga_main_page_url)
15
+ manga = MangafoxCrawler.new(manga_main_page_url)
16
+ elsif /mangahere/.match(manga_main_page_url)
17
+ manga = MangahereCrawler.new(manga_main_page_url)
18
+ else
19
+ puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
20
+ end
21
+
22
+ ImageDownloader.new.download(manga)
23
+
@@ -0,0 +1,124 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'addressable/uri'
4
+ require 'pathname'
5
+
6
+ class PageCrawler
7
+ attr_reader :custom_data, :url, :parsed_html, :number, :chapter
8
+
9
+ def initialize(custom_data, parsed_html, number, chapter)
10
+ @custom_data = custom_data
11
+ @url = custom_data[:url]
12
+ @parsed_html = parsed_html
13
+ @number = number
14
+ @chapter = chapter
15
+ end
16
+
17
+ def image_url
18
+ fail 'This method is abstract and have to be defined in a subclass.'
19
+ end
20
+ end
21
+
22
+ class ChapterCrawler
23
+ attr_reader :custom_data, :url, :parsed_html, :number, :manga
24
+
25
+ def initialize(custom_data, chapter_page, number, manga)
26
+ @custom_data = custom_data
27
+ @url = custom_data[:url]
28
+ @number = number
29
+ @parsed_html = chapter_page
30
+ @manga = manga
31
+ end
32
+
33
+ def pages_info
34
+ fail 'This method is abstract and have to be defined in a subclass.'
35
+ end
36
+
37
+ def name
38
+ fail 'This method is abstract and have to be defined in a subclass.'
39
+ end
40
+
41
+ def pages
42
+ Enumerator.new do | yielder |
43
+ number = 1
44
+ pages_info.each do | page_info |
45
+ parsed_html = Nokogiri::HTML(open(page_info[:url]))
46
+ yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
47
+ number += 1
48
+ end
49
+ end
50
+ end
51
+
52
+ def self.page_class
53
+ fail 'This method is abstract and have to be defined in a subclass.'
54
+ end
55
+ end
56
+
57
+ class MangaCrawler
58
+ attr_accessor :url, :parsed_html
59
+
60
+ def initialize(manga_main_page_url)
61
+ @url = manga_main_page_url
62
+ @parsed_html = Nokogiri::HTML(open(manga_main_page_url))
63
+ end
64
+
65
+ def chapters_info
66
+ fail 'This method is abstract and have to be defined in a subclass.'
67
+ end
68
+
69
+ def chapters
70
+ Enumerator.new do | yielder |
71
+ number = 1
72
+ chapters_info.each do | chapter_info |
73
+ page = Nokogiri::HTML(open(chapter_info[:url]))
74
+ yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
75
+ number += 1
76
+ end
77
+ end
78
+ end
79
+
80
+ def name
81
+ fail 'This method is abstract and have to be defined in a subclass.'
82
+ end
83
+
84
+ def self.chapter_class
85
+ fail 'This method is abstract and have to be defined in a subclass.'
86
+ end
87
+ end
88
+
89
+ class ImageDownloader
90
+ def initialize(base_dir = Dir.pwd)
91
+ @base_dir = base_dir
92
+ end
93
+
94
+ def download(manga)
95
+ manga_dir = Pathname.new(@base_dir).join(manga.name + '/')
96
+ if manga_dir.exist?
97
+ p 'Manga dir exists. Skipping each existing chapter. If the script was forced to stop the last downloaded chapter can be incomplete. Remove it to be downloaded again.'
98
+ else
99
+ Dir.mkdir(manga_dir.to_s)
100
+ end
101
+ manga.chapters.each do | chapter |
102
+ chapter_dir = manga_dir.join(chapter.name + '/')
103
+ unless chapter_dir.exist?
104
+ Dir.mkdir(chapter_dir.to_s)
105
+ chapter.pages.each do | page |
106
+ page_name = self.class.format_page_name(page, chapter, manga)
107
+ page_abs_path = chapter_dir.join(page_name).to_s
108
+ File.open(page_abs_path, 'wb') do | f |
109
+ open(page.image_url, 'rb') do | image |
110
+ f.write(image.read)
111
+ end
112
+ end
113
+ end
114
+ end # end "unless chapter_dir.exist?"
115
+ end
116
+ end
117
+
118
+ def self.format_page_name(page, chapter, manga)
119
+ # TODO: usar log10 de chapter.pages.size para determinar padding de zeros
120
+ page_path = Addressable::URI.parse(page.image_url).path
121
+ format("%04d", page.number) + File.extname(page_path)
122
+ end
123
+ end
124
+
@@ -0,0 +1,56 @@
1
+ require 'yamd'
2
+ require 'addressable/uri'
3
+
4
+ class MangafoxPage < PageCrawler
5
+ def image_url
6
+ @parsed_html.at_css('#viewer a img')['src']
7
+ end
8
+ end
9
+
10
+ class MangafoxChapter < ChapterCrawler
11
+ def self.page_class
12
+ MangafoxPage
13
+ end
14
+
15
+ def pages_info
16
+ # there's no need of an lazy enumerator here, no IO action is taken
17
+ page_options = @parsed_html.at_css('.prev_page + div > select').css('option')
18
+ pages_number = page_options.map { | option | option['value'].to_i }.max
19
+
20
+ chapter_base_url = Addressable::URI.parse(self.url)
21
+ chapter_base_path, _ = Pathname.new(chapter_base_url.path).split
22
+ page_urls = (1..pages_number).to_a.map do | i |
23
+ chapter_base_url.path = chapter_base_path.join("#{i}.html").to_s
24
+ { url: chapter_base_url.to_s }
25
+ end
26
+
27
+ page_urls
28
+ end
29
+
30
+ def name
31
+ @custom_data[:name]
32
+ end
33
+ end
34
+
35
+ class MangafoxCrawler < MangaCrawler
36
+ def chapters_info
37
+ @parsed_html.css('#chapters ul li div').reverse.map do | chapter_link |
38
+ name = chapter_link.at_css('a.tips').text
39
+ # the real chapter name not always exist
40
+ span_with_real_name = chapter_link.at_css('a.tips + span')
41
+ name = name + ' - ' + span_with_real_name.text if span_with_real_name
42
+ { name: name,
43
+ url: chapter_link.at_css('a.tips')['href']
44
+ }
45
+ end
46
+ end
47
+
48
+ def self.chapter_class
49
+ MangafoxChapter
50
+ end
51
+
52
+ def name
53
+ @parsed_html.at_css('h1').text.match(/(.+) Manga/)[1]
54
+ end
55
+ end
56
+
@@ -0,0 +1,46 @@
1
+ require 'yamd'
2
+
3
+ class MangaherePage < PageCrawler
4
+ def image_url
5
+ @parsed_html.at_css('#viewer a img')['src']
6
+ end
7
+ end
8
+
9
+ class MangahereChapter < ChapterCrawler
10
+ def self.page_class
11
+ MangaherePage
12
+ end
13
+
14
+ def pages_info
15
+ # there's no need of an lazy enumerator here, no IO action is taken
16
+ page_options = @parsed_html.at_css('.prew_page + select').css('option')
17
+ page_urls = []
18
+ page_options.each do | option |
19
+ page_urls << { url: option['value'] }
20
+ end
21
+ page_urls
22
+ end
23
+
24
+ def name
25
+ @custom_data[:name]
26
+ end
27
+ end
28
+
29
+ class MangahereCrawler < MangaCrawler
30
+ def self.chapter_class
31
+ MangahereChapter
32
+ end
33
+
34
+ def chapters_info
35
+ @parsed_html.css('.detail_list ul li a').reverse.map do | chapter_link |
36
+ { name: chapter_link.text.strip,
37
+ url: chapter_link['href']
38
+ }
39
+ end
40
+ end
41
+
42
+ def name
43
+ @parsed_html.at_css('h1').text.strip
44
+ end
45
+ end
46
+
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yamd
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Henrique Becker
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-07-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: addressable
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '2.3'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '2.3'
41
+ description: 'This gem offers: classes to subclass and create a manga site crawler;
42
+ a dowloader to use with these classes; some site-specific scripts.'
43
+ email: henriquebecker91@gmail.com
44
+ executables:
45
+ - yamd
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lib/yamd/mangafox.rb
50
+ - lib/yamd/mangahere.rb
51
+ - lib/yamd.rb
52
+ - bin/yamd
53
+ homepage: http://rubygems.org/gems/yamd
54
+ licenses:
55
+ - Public domain
56
+ metadata: {}
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 2.0.3
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga
77
+ downloaders
78
+ test_files: []
79
+ has_rdoc: true