yamd 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/yamd +4 -1
- data/lib/yamd.rb +48 -14
- data/lib/yamd/gehentai.rb +44 -0
- data/lib/yamd/hentaicafe.rb +44 -0
- data/lib/yamd/mangahere.rb +10 -5
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a02e2f65df6e064441de364691ef2405e8104b2
|
4
|
+
data.tar.gz: d4e1c6e4aa6c322545e7d1e8684eb49e4dfedf96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 322808ece20606194fff0ac606fe313d33d7ce5cbd942cb64475d0ca4f540c33dbdf03db87137594ae2960066a9873b4885dd4d7c44161cd850c7cac3eff94f2
|
7
|
+
data.tar.gz: 233cfec564e2985af9facf3c77617c221b4b823e740fd98fd00deb4faadec7a04124e49fb53528245430f11c2afe4bea8d31db8dc46f3078df6245d7871a186e
|
data/bin/yamd
CHANGED
@@ -3,11 +3,12 @@
|
|
3
3
|
require 'yamd/mangahere'
|
4
4
|
require 'yamd/mangafox'
|
5
5
|
require 'yamd/fakku'
|
6
|
+
require 'yamd/hentaicafe'
|
6
7
|
|
7
8
|
unless ARGV.size > 0
|
8
9
|
puts 'USAGE: yamd <manga main page url>'
|
9
10
|
puts 'EXAMPLE: yamd http://www.mangahere.com/manga/asagao_to_kase_san/'
|
10
|
-
puts 'Support mangahere and mangafox sites so far.'
|
11
|
+
#puts 'Support mangahere and mangafox sites so far.'
|
11
12
|
exit
|
12
13
|
end
|
13
14
|
|
@@ -18,6 +19,8 @@ elsif /mangahere/.match(manga_main_page_url)
|
|
18
19
|
manga = MangahereCrawler.new(manga_main_page_url)
|
19
20
|
elsif /fakku/.match(manga_main_page_url)
|
20
21
|
manga = FakkuCrawler.new(manga_main_page_url)
|
22
|
+
elsif /hentai\.cafe/.match(manga_main_page_url)
|
23
|
+
manga = HentaiCafeCrawler.new(manga_main_page_url)
|
21
24
|
else
|
22
25
|
puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
|
23
26
|
end
|
data/lib/yamd.rb
CHANGED
@@ -1,31 +1,52 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'open-uri'
|
3
3
|
require 'addressable/uri'
|
4
|
+
require 'resolv-replace'
|
4
5
|
require 'pathname'
|
5
6
|
|
6
7
|
require 'capybara'
|
7
8
|
require 'capybara/poltergeist'
|
9
|
+
#require 'phantomjs'
|
8
10
|
|
9
11
|
Capybara.register_driver(:poltergeist) do | app |
|
10
|
-
Capybara::Poltergeist::Driver.new(app, js_errors: false)
|
12
|
+
#Capybara::Poltergeist::Driver.new(app, { js_errors: false})
|
13
|
+
#Capybara::Poltergeist::Driver.new(app, { phantomjs: Phantomjs.path, js_errors: false})
|
14
|
+
Capybara::Poltergeist::Driver.new(app, {
|
15
|
+
# this blacklist was needed to unbloat mangahere downloader,
|
16
|
+
# without it the mangahere downloader often timeout
|
17
|
+
url_blacklist: [
|
18
|
+
'googletagmanager.com',
|
19
|
+
'googleapis.com',
|
20
|
+
'facebook.net',
|
21
|
+
'facebook.com',
|
22
|
+
'adtrue.com',
|
23
|
+
'z6.com',
|
24
|
+
'sharethis.com',
|
25
|
+
'puserving.com'
|
26
|
+
],
|
27
|
+
js_errors: false,
|
28
|
+
phantomjs_options: ['--ignore-ssl-errors=yes', '--load-images=false']
|
29
|
+
})
|
11
30
|
end
|
12
31
|
|
13
32
|
Capybara.default_driver = :poltergeist
|
14
33
|
Capybara.run_server = false
|
15
|
-
$
|
34
|
+
$session = Capybara.current_session
|
16
35
|
|
17
36
|
def my_open(url)
|
18
|
-
|
19
|
-
|
20
|
-
|
37
|
+
puts "visiting " + url
|
38
|
+
$session.visit url
|
39
|
+
|
40
|
+
$session.html
|
21
41
|
end
|
22
42
|
|
23
43
|
class PageCrawler
|
24
|
-
attr_reader :custom_data, :url, :parsed_html, :number, :chapter
|
44
|
+
attr_reader :custom_data, :url, :uri, :parsed_html, :number, :chapter
|
25
45
|
|
26
46
|
def initialize(custom_data, parsed_html, number, chapter)
|
27
47
|
@custom_data = custom_data
|
28
48
|
@url = custom_data[:url]
|
49
|
+
@uri = Addressable::URI.heuristic_parse(url)
|
29
50
|
@parsed_html = parsed_html
|
30
51
|
@number = number
|
31
52
|
@chapter = chapter
|
@@ -34,14 +55,19 @@ class PageCrawler
|
|
34
55
|
def image_url
|
35
56
|
fail 'This method is abstract and have to be defined in a subclass.'
|
36
57
|
end
|
58
|
+
|
59
|
+
def clean_image_url
|
60
|
+
@uri.join(image_url).normalize.to_s
|
61
|
+
end
|
37
62
|
end
|
38
63
|
|
39
64
|
class ChapterCrawler
|
40
|
-
attr_reader :custom_data, :url, :parsed_html, :number, :manga
|
65
|
+
attr_reader :custom_data, :url, :uri, :parsed_html, :number, :manga
|
41
66
|
|
42
67
|
def initialize(custom_data, chapter_page, number, manga)
|
43
68
|
@custom_data = custom_data
|
44
69
|
@url = custom_data[:url]
|
70
|
+
@uri = Addressable::URI.heuristic_parse(url)
|
45
71
|
@number = number
|
46
72
|
@parsed_html = chapter_page
|
47
73
|
@manga = manga
|
@@ -59,7 +85,10 @@ class ChapterCrawler
|
|
59
85
|
Enumerator.new do | yielder |
|
60
86
|
number = 1
|
61
87
|
pages_info.each do | page_info |
|
62
|
-
|
88
|
+
# fix the url to be absolute
|
89
|
+
full_url = @uri.join(page_info[:url]).to_s
|
90
|
+
page_info[:url] = full_url
|
91
|
+
parsed_html = Nokogiri::HTML(my_open(full_url))
|
63
92
|
yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
|
64
93
|
number += 1
|
65
94
|
end
|
@@ -72,11 +101,12 @@ class ChapterCrawler
|
|
72
101
|
end
|
73
102
|
|
74
103
|
class MangaCrawler
|
75
|
-
attr_accessor :url, :parsed_html
|
104
|
+
attr_accessor :url, :uri, :parsed_html
|
76
105
|
|
77
106
|
def initialize(manga_main_page_url)
|
78
107
|
@url = manga_main_page_url
|
79
|
-
@
|
108
|
+
@uri = Addressable::URI.heuristic_parse(url)
|
109
|
+
@parsed_html = Nokogiri::HTML(my_open(manga_main_page_url))
|
80
110
|
end
|
81
111
|
|
82
112
|
def chapters_info
|
@@ -87,7 +117,10 @@ class MangaCrawler
|
|
87
117
|
Enumerator.new do | yielder |
|
88
118
|
number = 1
|
89
119
|
chapters_info.each do | chapter_info |
|
90
|
-
|
120
|
+
# fix the url to be absolute
|
121
|
+
full_url = @uri.join(chapter_info[:url]).to_s
|
122
|
+
chapter_info[:url] = full_url
|
123
|
+
page = Nokogiri::HTML(my_open(full_url))
|
91
124
|
yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
|
92
125
|
number += 1
|
93
126
|
end
|
@@ -143,9 +176,10 @@ class ImageDownloader
|
|
143
176
|
page_name = self.class.format_page_name(page, chapter, manga)
|
144
177
|
page_abs_path = chapter_dir.join(page_name).to_s
|
145
178
|
File.open(page_abs_path, 'wb') do | f |
|
146
|
-
|
147
|
-
|
148
|
-
|
179
|
+
open(page.clean_image_url) do | image |
|
180
|
+
# TODO: check if copy_stream avoids alloacting the whole image in
|
181
|
+
# memory before starting to flush it
|
182
|
+
IO.copy_stream(image, f)
|
149
183
|
end
|
150
184
|
end
|
151
185
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'yamd'
|
2
|
+
|
3
|
+
class MangaherePage < PageCrawler
|
4
|
+
def image_url
|
5
|
+
@parsed_html.at_css('#viewer a img')['src']
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class MangahereChapter < ChapterCrawler
|
10
|
+
def self.page_class
|
11
|
+
MangaherePage
|
12
|
+
end
|
13
|
+
|
14
|
+
def pages_info
|
15
|
+
# there's no need of an lazy enumerator here, no IO action is taken
|
16
|
+
page_options = @parsed_html.at_css('.prew_page + select').css('option')
|
17
|
+
page_urls = []
|
18
|
+
page_options.each do | option |
|
19
|
+
page_urls << { url: option['value'] }
|
20
|
+
end
|
21
|
+
page_urls
|
22
|
+
end
|
23
|
+
|
24
|
+
def name
|
25
|
+
@custom_data[:name]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class MangahereCrawler < MangaCrawler
|
30
|
+
def self.chapter_class
|
31
|
+
MangahereChapter
|
32
|
+
end
|
33
|
+
|
34
|
+
def chapters_info
|
35
|
+
url = URI.join(self.url, @parsed_html.at_css('a.button.green')['href'])
|
36
|
+
[{ name: 'OnlyChapter',
|
37
|
+
url: url }]
|
38
|
+
end
|
39
|
+
|
40
|
+
def name
|
41
|
+
@parsed_html.at_css('h1').text.strip
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'yamd'
|
2
|
+
|
3
|
+
class HentaiCafePage < PageCrawler
|
4
|
+
def image_url
|
5
|
+
@parsed_html.at_css('#page img')['src']
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class HentaiCafeChapter < ChapterCrawler
|
10
|
+
def self.page_class
|
11
|
+
HentaiCafePage
|
12
|
+
end
|
13
|
+
|
14
|
+
def pages_info
|
15
|
+
# there's no need of an lazy enumerator here, no IO action is taken
|
16
|
+
page_list = @parsed_html.at_css('ul.dropdown').css('li')
|
17
|
+
page_urls = []
|
18
|
+
page_list.each do | li_el |
|
19
|
+
page_urls << { url: li_el.css('a').first['href'] }
|
20
|
+
end
|
21
|
+
page_urls
|
22
|
+
end
|
23
|
+
|
24
|
+
def name
|
25
|
+
@custom_data[:name]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class HentaiCafeCrawler < MangaCrawler
|
30
|
+
def self.chapter_class
|
31
|
+
HentaiCafeChapter
|
32
|
+
end
|
33
|
+
|
34
|
+
def chapters_info
|
35
|
+
css = 'a.x-btn.x-btn-flat.x-btn-rounded.x-btn-large'
|
36
|
+
[{ name: 'OnlyChapter',
|
37
|
+
url: @parsed_html.at_css(css)['href'] }]
|
38
|
+
end
|
39
|
+
|
40
|
+
def name
|
41
|
+
@parsed_html.at_css('h3').text.strip
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
data/lib/yamd/mangahere.rb
CHANGED
@@ -2,7 +2,7 @@ require 'yamd'
|
|
2
2
|
|
3
3
|
class MangaherePage < PageCrawler
|
4
4
|
def image_url
|
5
|
-
@parsed_html.at_css('#viewer a img')['src']
|
5
|
+
@parsed_html.at_css('#viewer a img:not(.loadingImg)')['src']
|
6
6
|
end
|
7
7
|
end
|
8
8
|
|
@@ -18,7 +18,12 @@ class MangahereChapter < ChapterCrawler
|
|
18
18
|
page_options.each do | option |
|
19
19
|
page_urls << { url: option['value'] }
|
20
20
|
end
|
21
|
-
|
21
|
+
# drop the 'featured' page at end of each chapter
|
22
|
+
if /featured/.match(page_urls.last[:url]) then
|
23
|
+
page_urls[0...-1]
|
24
|
+
else
|
25
|
+
page_urls
|
26
|
+
end
|
22
27
|
end
|
23
28
|
|
24
29
|
def name
|
@@ -33,9 +38,9 @@ class MangahereCrawler < MangaCrawler
|
|
33
38
|
|
34
39
|
def chapters_info
|
35
40
|
@parsed_html.css('.detail_list ul li a').reverse.map do | chapter_link |
|
36
|
-
|
37
|
-
|
38
|
-
|
41
|
+
{ name: chapter_link.text.strip,
|
42
|
+
url: chapter_link['href']
|
43
|
+
}
|
39
44
|
end
|
40
45
|
end
|
41
46
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yamd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Henrique Becker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '1
|
75
|
+
version: '2.1'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '1
|
82
|
+
version: '2.1'
|
83
83
|
description: 'This gem offers: classes to subclass and create a manga site crawler;
|
84
84
|
a dowloader to use with these classes; some site-specific scripts.'
|
85
85
|
email: henriquebecker91@gmail.com
|
@@ -91,11 +91,13 @@ files:
|
|
91
91
|
- bin/yamd
|
92
92
|
- lib/yamd.rb
|
93
93
|
- lib/yamd/fakku.rb
|
94
|
+
- lib/yamd/gehentai.rb
|
95
|
+
- lib/yamd/hentaicafe.rb
|
94
96
|
- lib/yamd/mangafox.rb
|
95
97
|
- lib/yamd/mangahere.rb
|
96
98
|
homepage: http://rubygems.org/gems/yamd
|
97
99
|
licenses:
|
98
|
-
-
|
100
|
+
- Unlicense
|
99
101
|
metadata: {}
|
100
102
|
post_install_message:
|
101
103
|
rdoc_options: []
|
@@ -113,7 +115,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
115
|
version: '0'
|
114
116
|
requirements: []
|
115
117
|
rubyforge_project:
|
116
|
-
rubygems_version: 2.
|
118
|
+
rubygems_version: 2.6.13
|
117
119
|
signing_key:
|
118
120
|
specification_version: 4
|
119
121
|
summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga
|