yamd 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a7cfa4dadcceadf400f49b0d0fc2e353b9a474a7
4
- data.tar.gz: ae7c9eb249ca40a34515832316ca20069addd234
3
+ metadata.gz: 8a02e2f65df6e064441de364691ef2405e8104b2
4
+ data.tar.gz: d4e1c6e4aa6c322545e7d1e8684eb49e4dfedf96
5
5
  SHA512:
6
- metadata.gz: 13e0fd6911898fe1eed2b82ef27e55bdda1cbc4df5020694bdbf5236145a5490a1d518e0bb4a1716977644bfcb8e3ce4774bcc6257a76459a8d08a96b1b26a3b
7
- data.tar.gz: b9b3c932f1313b3d6909c88cb292a3be96f3d22b3246c8dc190853152b9897845ecc4621e12d46d2180f1af709c854d4743c17c1a60dfd33b093a7bc38f09725
6
+ metadata.gz: 322808ece20606194fff0ac606fe313d33d7ce5cbd942cb64475d0ca4f540c33dbdf03db87137594ae2960066a9873b4885dd4d7c44161cd850c7cac3eff94f2
7
+ data.tar.gz: 233cfec564e2985af9facf3c77617c221b4b823e740fd98fd00deb4faadec7a04124e49fb53528245430f11c2afe4bea8d31db8dc46f3078df6245d7871a186e
data/bin/yamd CHANGED
@@ -3,11 +3,12 @@
3
3
  require 'yamd/mangahere'
4
4
  require 'yamd/mangafox'
5
5
  require 'yamd/fakku'
6
+ require 'yamd/hentaicafe'
6
7
 
7
8
  unless ARGV.size > 0
8
9
  puts 'USAGE: yamd <manga main page url>'
9
10
  puts 'EXAMPLE: yamd http://www.mangahere.com/manga/asagao_to_kase_san/'
10
- puts 'Support mangahere and mangafox sites so far.'
11
+ #puts 'Support mangahere and mangafox sites so far.'
11
12
  exit
12
13
  end
13
14
 
@@ -18,6 +19,8 @@ elsif /mangahere/.match(manga_main_page_url)
18
19
  manga = MangahereCrawler.new(manga_main_page_url)
19
20
  elsif /fakku/.match(manga_main_page_url)
20
21
  manga = FakkuCrawler.new(manga_main_page_url)
22
+ elsif /hentai\.cafe/.match(manga_main_page_url)
23
+ manga = HentaiCafeCrawler.new(manga_main_page_url)
21
24
  else
22
25
  puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
23
26
  end
@@ -1,31 +1,52 @@
1
1
  require 'nokogiri'
2
2
  require 'open-uri'
3
3
  require 'addressable/uri'
4
+ require 'resolv-replace'
4
5
  require 'pathname'
5
6
 
6
7
  require 'capybara'
7
8
  require 'capybara/poltergeist'
9
+ #require 'phantomjs'
8
10
 
9
11
  Capybara.register_driver(:poltergeist) do | app |
10
- Capybara::Poltergeist::Driver.new(app, js_errors: false)
12
+ #Capybara::Poltergeist::Driver.new(app, { js_errors: false})
13
+ #Capybara::Poltergeist::Driver.new(app, { phantomjs: Phantomjs.path, js_errors: false})
14
+ Capybara::Poltergeist::Driver.new(app, {
15
+ # this blacklist was needed to unbloat mangahere downloader,
16
+ # without it the mangahere downloader often timeout
17
+ url_blacklist: [
18
+ 'googletagmanager.com',
19
+ 'googleapis.com',
20
+ 'facebook.net',
21
+ 'facebook.com',
22
+ 'adtrue.com',
23
+ 'z6.com',
24
+ 'sharethis.com',
25
+ 'puserving.com'
26
+ ],
27
+ js_errors: false,
28
+ phantomjs_options: ['--ignore-ssl-errors=yes', '--load-images=false']
29
+ })
11
30
  end
12
31
 
13
32
  Capybara.default_driver = :poltergeist
14
33
  Capybara.run_server = false
15
- $internet = Capybara.current_session
34
+ $session = Capybara.current_session
16
35
 
17
36
  def my_open(url)
18
- $internet.visit url
19
-
20
- $internet.html
37
+ puts "visiting " + url
38
+ $session.visit url
39
+
40
+ $session.html
21
41
  end
22
42
 
23
43
  class PageCrawler
24
- attr_reader :custom_data, :url, :parsed_html, :number, :chapter
44
+ attr_reader :custom_data, :url, :uri, :parsed_html, :number, :chapter
25
45
 
26
46
  def initialize(custom_data, parsed_html, number, chapter)
27
47
  @custom_data = custom_data
28
48
  @url = custom_data[:url]
49
+ @uri = Addressable::URI.heuristic_parse(url)
29
50
  @parsed_html = parsed_html
30
51
  @number = number
31
52
  @chapter = chapter
@@ -34,14 +55,19 @@ class PageCrawler
34
55
  def image_url
35
56
  fail 'This method is abstract and have to be defined in a subclass.'
36
57
  end
58
+
59
+ def clean_image_url
60
+ @uri.join(image_url).normalize.to_s
61
+ end
37
62
  end
38
63
 
39
64
  class ChapterCrawler
40
- attr_reader :custom_data, :url, :parsed_html, :number, :manga
65
+ attr_reader :custom_data, :url, :uri, :parsed_html, :number, :manga
41
66
 
42
67
  def initialize(custom_data, chapter_page, number, manga)
43
68
  @custom_data = custom_data
44
69
  @url = custom_data[:url]
70
+ @uri = Addressable::URI.heuristic_parse(url)
45
71
  @number = number
46
72
  @parsed_html = chapter_page
47
73
  @manga = manga
@@ -59,7 +85,10 @@ class ChapterCrawler
59
85
  Enumerator.new do | yielder |
60
86
  number = 1
61
87
  pages_info.each do | page_info |
62
- parsed_html = Nokogiri::HTML(my_open(page_info[:url]))
88
+ # fix the url to be absolute
89
+ full_url = @uri.join(page_info[:url]).to_s
90
+ page_info[:url] = full_url
91
+ parsed_html = Nokogiri::HTML(my_open(full_url))
63
92
  yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
64
93
  number += 1
65
94
  end
@@ -72,11 +101,12 @@ class ChapterCrawler
72
101
  end
73
102
 
74
103
  class MangaCrawler
75
- attr_accessor :url, :parsed_html
104
+ attr_accessor :url, :uri, :parsed_html
76
105
 
77
106
  def initialize(manga_main_page_url)
78
107
  @url = manga_main_page_url
79
- @parsed_html = Nokogiri::HTML(open(manga_main_page_url))
108
+ @uri = Addressable::URI.heuristic_parse(url)
109
+ @parsed_html = Nokogiri::HTML(my_open(manga_main_page_url))
80
110
  end
81
111
 
82
112
  def chapters_info
@@ -87,7 +117,10 @@ class MangaCrawler
87
117
  Enumerator.new do | yielder |
88
118
  number = 1
89
119
  chapters_info.each do | chapter_info |
90
- page = Nokogiri::HTML(my_open(chapter_info[:url]))
120
+ # fix the url to be absolute
121
+ full_url = @uri.join(chapter_info[:url]).to_s
122
+ chapter_info[:url] = full_url
123
+ page = Nokogiri::HTML(my_open(full_url))
91
124
  yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
92
125
  number += 1
93
126
  end
@@ -143,9 +176,10 @@ class ImageDownloader
143
176
  page_name = self.class.format_page_name(page, chapter, manga)
144
177
  page_abs_path = chapter_dir.join(page_name).to_s
145
178
  File.open(page_abs_path, 'wb') do | f |
146
- safe_uri = URI.encode(page.image_url, '[]')
147
- open(safe_uri, 'rb') do | image |
148
- f.write(image.read)
179
+ open(page.clean_image_url) do | image |
180
+ # TODO: check if copy_stream avoids alloacting the whole image in
181
+ # memory before starting to flush it
182
+ IO.copy_stream(image, f)
149
183
  end
150
184
  end
151
185
  end
@@ -0,0 +1,44 @@
1
+ require 'yamd'
2
+
3
+ class MangaherePage < PageCrawler
4
+ def image_url
5
+ @parsed_html.at_css('#viewer a img')['src']
6
+ end
7
+ end
8
+
9
+ class MangahereChapter < ChapterCrawler
10
+ def self.page_class
11
+ MangaherePage
12
+ end
13
+
14
+ def pages_info
15
+ # there's no need of an lazy enumerator here, no IO action is taken
16
+ page_options = @parsed_html.at_css('.prew_page + select').css('option')
17
+ page_urls = []
18
+ page_options.each do | option |
19
+ page_urls << { url: option['value'] }
20
+ end
21
+ page_urls
22
+ end
23
+
24
+ def name
25
+ @custom_data[:name]
26
+ end
27
+ end
28
+
29
+ class MangahereCrawler < MangaCrawler
30
+ def self.chapter_class
31
+ MangahereChapter
32
+ end
33
+
34
+ def chapters_info
35
+ url = URI.join(self.url, @parsed_html.at_css('a.button.green')['href'])
36
+ [{ name: 'OnlyChapter',
37
+ url: url }]
38
+ end
39
+
40
+ def name
41
+ @parsed_html.at_css('h1').text.strip
42
+ end
43
+ end
44
+
@@ -0,0 +1,44 @@
1
+ require 'yamd'
2
+
3
+ class HentaiCafePage < PageCrawler
4
+ def image_url
5
+ @parsed_html.at_css('#page img')['src']
6
+ end
7
+ end
8
+
9
+ class HentaiCafeChapter < ChapterCrawler
10
+ def self.page_class
11
+ HentaiCafePage
12
+ end
13
+
14
+ def pages_info
15
+ # there's no need of an lazy enumerator here, no IO action is taken
16
+ page_list = @parsed_html.at_css('ul.dropdown').css('li')
17
+ page_urls = []
18
+ page_list.each do | li_el |
19
+ page_urls << { url: li_el.css('a').first['href'] }
20
+ end
21
+ page_urls
22
+ end
23
+
24
+ def name
25
+ @custom_data[:name]
26
+ end
27
+ end
28
+
29
+ class HentaiCafeCrawler < MangaCrawler
30
+ def self.chapter_class
31
+ HentaiCafeChapter
32
+ end
33
+
34
+ def chapters_info
35
+ css = 'a.x-btn.x-btn-flat.x-btn-rounded.x-btn-large'
36
+ [{ name: 'OnlyChapter',
37
+ url: @parsed_html.at_css(css)['href'] }]
38
+ end
39
+
40
+ def name
41
+ @parsed_html.at_css('h3').text.strip
42
+ end
43
+ end
44
+
@@ -2,7 +2,7 @@ require 'yamd'
2
2
 
3
3
  class MangaherePage < PageCrawler
4
4
  def image_url
5
- @parsed_html.at_css('#viewer a img')['src']
5
+ @parsed_html.at_css('#viewer a img:not(.loadingImg)')['src']
6
6
  end
7
7
  end
8
8
 
@@ -18,7 +18,12 @@ class MangahereChapter < ChapterCrawler
18
18
  page_options.each do | option |
19
19
  page_urls << { url: option['value'] }
20
20
  end
21
- page_urls
21
+ # drop the 'featured' page at end of each chapter
22
+ if /featured/.match(page_urls.last[:url]) then
23
+ page_urls[0...-1]
24
+ else
25
+ page_urls
26
+ end
22
27
  end
23
28
 
24
29
  def name
@@ -33,9 +38,9 @@ class MangahereCrawler < MangaCrawler
33
38
 
34
39
  def chapters_info
35
40
  @parsed_html.css('.detail_list ul li a').reverse.map do | chapter_link |
36
- { name: chapter_link.text.strip,
37
- url: chapter_link['href']
38
- }
41
+ { name: chapter_link.text.strip,
42
+ url: chapter_link['href']
43
+ }
39
44
  end
40
45
  end
41
46
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yamd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henrique Becker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-25 00:00:00.000000000 Z
11
+ date: 2018-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.9'
75
+ version: '2.1'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.9'
82
+ version: '2.1'
83
83
  description: 'This gem offers: classes to subclass and create a manga site crawler;
84
84
  a dowloader to use with these classes; some site-specific scripts.'
85
85
  email: henriquebecker91@gmail.com
@@ -91,11 +91,13 @@ files:
91
91
  - bin/yamd
92
92
  - lib/yamd.rb
93
93
  - lib/yamd/fakku.rb
94
+ - lib/yamd/gehentai.rb
95
+ - lib/yamd/hentaicafe.rb
94
96
  - lib/yamd/mangafox.rb
95
97
  - lib/yamd/mangahere.rb
96
98
  homepage: http://rubygems.org/gems/yamd
97
99
  licenses:
98
- - Public domain
100
+ - Unlicense
99
101
  metadata: {}
100
102
  post_install_message:
101
103
  rdoc_options: []
@@ -113,7 +115,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
115
  version: '0'
114
116
  requirements: []
115
117
  rubyforge_project:
116
- rubygems_version: 2.4.5.1
118
+ rubygems_version: 2.6.13
117
119
  signing_key:
118
120
  specification_version: 4
119
121
  summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga