yamd 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/yamd +4 -1
- data/lib/yamd.rb +48 -14
- data/lib/yamd/gehentai.rb +44 -0
- data/lib/yamd/hentaicafe.rb +44 -0
- data/lib/yamd/mangahere.rb +10 -5
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a02e2f65df6e064441de364691ef2405e8104b2
|
4
|
+
data.tar.gz: d4e1c6e4aa6c322545e7d1e8684eb49e4dfedf96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 322808ece20606194fff0ac606fe313d33d7ce5cbd942cb64475d0ca4f540c33dbdf03db87137594ae2960066a9873b4885dd4d7c44161cd850c7cac3eff94f2
|
7
|
+
data.tar.gz: 233cfec564e2985af9facf3c77617c221b4b823e740fd98fd00deb4faadec7a04124e49fb53528245430f11c2afe4bea8d31db8dc46f3078df6245d7871a186e
|
data/bin/yamd
CHANGED
@@ -3,11 +3,12 @@
|
|
3
3
|
require 'yamd/mangahere'
|
4
4
|
require 'yamd/mangafox'
|
5
5
|
require 'yamd/fakku'
|
6
|
+
require 'yamd/hentaicafe'
|
6
7
|
|
7
8
|
unless ARGV.size > 0
|
8
9
|
puts 'USAGE: yamd <manga main page url>'
|
9
10
|
puts 'EXAMPLE: yamd http://www.mangahere.com/manga/asagao_to_kase_san/'
|
10
|
-
puts 'Support mangahere and mangafox sites so far.'
|
11
|
+
#puts 'Support mangahere and mangafox sites so far.'
|
11
12
|
exit
|
12
13
|
end
|
13
14
|
|
@@ -18,6 +19,8 @@ elsif /mangahere/.match(manga_main_page_url)
|
|
18
19
|
manga = MangahereCrawler.new(manga_main_page_url)
|
19
20
|
elsif /fakku/.match(manga_main_page_url)
|
20
21
|
manga = FakkuCrawler.new(manga_main_page_url)
|
22
|
+
elsif /hentai\.cafe/.match(manga_main_page_url)
|
23
|
+
manga = HentaiCafeCrawler.new(manga_main_page_url)
|
21
24
|
else
|
22
25
|
puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
|
23
26
|
end
|
data/lib/yamd.rb
CHANGED
@@ -1,31 +1,52 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'open-uri'
|
3
3
|
require 'addressable/uri'
|
4
|
+
require 'resolv-replace'
|
4
5
|
require 'pathname'
|
5
6
|
|
6
7
|
require 'capybara'
|
7
8
|
require 'capybara/poltergeist'
|
9
|
+
#require 'phantomjs'
|
8
10
|
|
9
11
|
Capybara.register_driver(:poltergeist) do | app |
|
10
|
-
Capybara::Poltergeist::Driver.new(app, js_errors: false)
|
12
|
+
#Capybara::Poltergeist::Driver.new(app, { js_errors: false})
|
13
|
+
#Capybara::Poltergeist::Driver.new(app, { phantomjs: Phantomjs.path, js_errors: false})
|
14
|
+
Capybara::Poltergeist::Driver.new(app, {
|
15
|
+
# this blacklist was needed to unbloat mangahere downloader,
|
16
|
+
# without it the mangahere downloader often timeout
|
17
|
+
url_blacklist: [
|
18
|
+
'googletagmanager.com',
|
19
|
+
'googleapis.com',
|
20
|
+
'facebook.net',
|
21
|
+
'facebook.com',
|
22
|
+
'adtrue.com',
|
23
|
+
'z6.com',
|
24
|
+
'sharethis.com',
|
25
|
+
'puserving.com'
|
26
|
+
],
|
27
|
+
js_errors: false,
|
28
|
+
phantomjs_options: ['--ignore-ssl-errors=yes', '--load-images=false']
|
29
|
+
})
|
11
30
|
end
|
12
31
|
|
13
32
|
Capybara.default_driver = :poltergeist
|
14
33
|
Capybara.run_server = false
|
15
|
-
$
|
34
|
+
$session = Capybara.current_session
|
16
35
|
|
17
36
|
def my_open(url)
|
18
|
-
|
19
|
-
|
20
|
-
|
37
|
+
puts "visiting " + url
|
38
|
+
$session.visit url
|
39
|
+
|
40
|
+
$session.html
|
21
41
|
end
|
22
42
|
|
23
43
|
class PageCrawler
|
24
|
-
attr_reader :custom_data, :url, :parsed_html, :number, :chapter
|
44
|
+
attr_reader :custom_data, :url, :uri, :parsed_html, :number, :chapter
|
25
45
|
|
26
46
|
def initialize(custom_data, parsed_html, number, chapter)
|
27
47
|
@custom_data = custom_data
|
28
48
|
@url = custom_data[:url]
|
49
|
+
@uri = Addressable::URI.heuristic_parse(url)
|
29
50
|
@parsed_html = parsed_html
|
30
51
|
@number = number
|
31
52
|
@chapter = chapter
|
@@ -34,14 +55,19 @@ class PageCrawler
|
|
34
55
|
def image_url
|
35
56
|
fail 'This method is abstract and have to be defined in a subclass.'
|
36
57
|
end
|
58
|
+
|
59
|
+
def clean_image_url
|
60
|
+
@uri.join(image_url).normalize.to_s
|
61
|
+
end
|
37
62
|
end
|
38
63
|
|
39
64
|
class ChapterCrawler
|
40
|
-
attr_reader :custom_data, :url, :parsed_html, :number, :manga
|
65
|
+
attr_reader :custom_data, :url, :uri, :parsed_html, :number, :manga
|
41
66
|
|
42
67
|
def initialize(custom_data, chapter_page, number, manga)
|
43
68
|
@custom_data = custom_data
|
44
69
|
@url = custom_data[:url]
|
70
|
+
@uri = Addressable::URI.heuristic_parse(url)
|
45
71
|
@number = number
|
46
72
|
@parsed_html = chapter_page
|
47
73
|
@manga = manga
|
@@ -59,7 +85,10 @@ class ChapterCrawler
|
|
59
85
|
Enumerator.new do | yielder |
|
60
86
|
number = 1
|
61
87
|
pages_info.each do | page_info |
|
62
|
-
|
88
|
+
# fix the url to be absolute
|
89
|
+
full_url = @uri.join(page_info[:url]).to_s
|
90
|
+
page_info[:url] = full_url
|
91
|
+
parsed_html = Nokogiri::HTML(my_open(full_url))
|
63
92
|
yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
|
64
93
|
number += 1
|
65
94
|
end
|
@@ -72,11 +101,12 @@ class ChapterCrawler
|
|
72
101
|
end
|
73
102
|
|
74
103
|
class MangaCrawler
|
75
|
-
attr_accessor :url, :parsed_html
|
104
|
+
attr_accessor :url, :uri, :parsed_html
|
76
105
|
|
77
106
|
def initialize(manga_main_page_url)
|
78
107
|
@url = manga_main_page_url
|
79
|
-
@
|
108
|
+
@uri = Addressable::URI.heuristic_parse(url)
|
109
|
+
@parsed_html = Nokogiri::HTML(my_open(manga_main_page_url))
|
80
110
|
end
|
81
111
|
|
82
112
|
def chapters_info
|
@@ -87,7 +117,10 @@ class MangaCrawler
|
|
87
117
|
Enumerator.new do | yielder |
|
88
118
|
number = 1
|
89
119
|
chapters_info.each do | chapter_info |
|
90
|
-
|
120
|
+
# fix the url to be absolute
|
121
|
+
full_url = @uri.join(chapter_info[:url]).to_s
|
122
|
+
chapter_info[:url] = full_url
|
123
|
+
page = Nokogiri::HTML(my_open(full_url))
|
91
124
|
yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
|
92
125
|
number += 1
|
93
126
|
end
|
@@ -143,9 +176,10 @@ class ImageDownloader
|
|
143
176
|
page_name = self.class.format_page_name(page, chapter, manga)
|
144
177
|
page_abs_path = chapter_dir.join(page_name).to_s
|
145
178
|
File.open(page_abs_path, 'wb') do | f |
|
146
|
-
|
147
|
-
|
148
|
-
|
179
|
+
open(page.clean_image_url) do | image |
|
180
|
+
# TODO: check if copy_stream avoids alloacting the whole image in
|
181
|
+
# memory before starting to flush it
|
182
|
+
IO.copy_stream(image, f)
|
149
183
|
end
|
150
184
|
end
|
151
185
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'yamd'
|
2
|
+
|
3
|
+
class MangaherePage < PageCrawler
|
4
|
+
def image_url
|
5
|
+
@parsed_html.at_css('#viewer a img')['src']
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class MangahereChapter < ChapterCrawler
|
10
|
+
def self.page_class
|
11
|
+
MangaherePage
|
12
|
+
end
|
13
|
+
|
14
|
+
def pages_info
|
15
|
+
# there's no need of an lazy enumerator here, no IO action is taken
|
16
|
+
page_options = @parsed_html.at_css('.prew_page + select').css('option')
|
17
|
+
page_urls = []
|
18
|
+
page_options.each do | option |
|
19
|
+
page_urls << { url: option['value'] }
|
20
|
+
end
|
21
|
+
page_urls
|
22
|
+
end
|
23
|
+
|
24
|
+
def name
|
25
|
+
@custom_data[:name]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class MangahereCrawler < MangaCrawler
|
30
|
+
def self.chapter_class
|
31
|
+
MangahereChapter
|
32
|
+
end
|
33
|
+
|
34
|
+
def chapters_info
|
35
|
+
url = URI.join(self.url, @parsed_html.at_css('a.button.green')['href'])
|
36
|
+
[{ name: 'OnlyChapter',
|
37
|
+
url: url }]
|
38
|
+
end
|
39
|
+
|
40
|
+
def name
|
41
|
+
@parsed_html.at_css('h1').text.strip
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'yamd'
|
2
|
+
|
3
|
+
class HentaiCafePage < PageCrawler
|
4
|
+
def image_url
|
5
|
+
@parsed_html.at_css('#page img')['src']
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class HentaiCafeChapter < ChapterCrawler
|
10
|
+
def self.page_class
|
11
|
+
HentaiCafePage
|
12
|
+
end
|
13
|
+
|
14
|
+
def pages_info
|
15
|
+
# there's no need of an lazy enumerator here, no IO action is taken
|
16
|
+
page_list = @parsed_html.at_css('ul.dropdown').css('li')
|
17
|
+
page_urls = []
|
18
|
+
page_list.each do | li_el |
|
19
|
+
page_urls << { url: li_el.css('a').first['href'] }
|
20
|
+
end
|
21
|
+
page_urls
|
22
|
+
end
|
23
|
+
|
24
|
+
def name
|
25
|
+
@custom_data[:name]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class HentaiCafeCrawler < MangaCrawler
|
30
|
+
def self.chapter_class
|
31
|
+
HentaiCafeChapter
|
32
|
+
end
|
33
|
+
|
34
|
+
def chapters_info
|
35
|
+
css = 'a.x-btn.x-btn-flat.x-btn-rounded.x-btn-large'
|
36
|
+
[{ name: 'OnlyChapter',
|
37
|
+
url: @parsed_html.at_css(css)['href'] }]
|
38
|
+
end
|
39
|
+
|
40
|
+
def name
|
41
|
+
@parsed_html.at_css('h3').text.strip
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
data/lib/yamd/mangahere.rb
CHANGED
@@ -2,7 +2,7 @@ require 'yamd'
|
|
2
2
|
|
3
3
|
class MangaherePage < PageCrawler
|
4
4
|
def image_url
|
5
|
-
@parsed_html.at_css('#viewer a img')['src']
|
5
|
+
@parsed_html.at_css('#viewer a img:not(.loadingImg)')['src']
|
6
6
|
end
|
7
7
|
end
|
8
8
|
|
@@ -18,7 +18,12 @@ class MangahereChapter < ChapterCrawler
|
|
18
18
|
page_options.each do | option |
|
19
19
|
page_urls << { url: option['value'] }
|
20
20
|
end
|
21
|
-
|
21
|
+
# drop the 'featured' page at end of each chapter
|
22
|
+
if /featured/.match(page_urls.last[:url]) then
|
23
|
+
page_urls[0...-1]
|
24
|
+
else
|
25
|
+
page_urls
|
26
|
+
end
|
22
27
|
end
|
23
28
|
|
24
29
|
def name
|
@@ -33,9 +38,9 @@ class MangahereCrawler < MangaCrawler
|
|
33
38
|
|
34
39
|
def chapters_info
|
35
40
|
@parsed_html.css('.detail_list ul li a').reverse.map do | chapter_link |
|
36
|
-
|
37
|
-
|
38
|
-
|
41
|
+
{ name: chapter_link.text.strip,
|
42
|
+
url: chapter_link['href']
|
43
|
+
}
|
39
44
|
end
|
40
45
|
end
|
41
46
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yamd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Henrique Becker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '1
|
75
|
+
version: '2.1'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '1
|
82
|
+
version: '2.1'
|
83
83
|
description: 'This gem offers: classes to subclass and create a manga site crawler;
|
84
84
|
a dowloader to use with these classes; some site-specific scripts.'
|
85
85
|
email: henriquebecker91@gmail.com
|
@@ -91,11 +91,13 @@ files:
|
|
91
91
|
- bin/yamd
|
92
92
|
- lib/yamd.rb
|
93
93
|
- lib/yamd/fakku.rb
|
94
|
+
- lib/yamd/gehentai.rb
|
95
|
+
- lib/yamd/hentaicafe.rb
|
94
96
|
- lib/yamd/mangafox.rb
|
95
97
|
- lib/yamd/mangahere.rb
|
96
98
|
homepage: http://rubygems.org/gems/yamd
|
97
99
|
licenses:
|
98
|
-
-
|
100
|
+
- Unlicense
|
99
101
|
metadata: {}
|
100
102
|
post_install_message:
|
101
103
|
rdoc_options: []
|
@@ -113,7 +115,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
115
|
version: '0'
|
114
116
|
requirements: []
|
115
117
|
rubyforge_project:
|
116
|
-
rubygems_version: 2.
|
118
|
+
rubygems_version: 2.6.13
|
117
119
|
signing_key:
|
118
120
|
specification_version: 4
|
119
121
|
summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga
|