yamd 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a7cfa4dadcceadf400f49b0d0fc2e353b9a474a7
4
- data.tar.gz: ae7c9eb249ca40a34515832316ca20069addd234
3
+ metadata.gz: 8a02e2f65df6e064441de364691ef2405e8104b2
4
+ data.tar.gz: d4e1c6e4aa6c322545e7d1e8684eb49e4dfedf96
5
5
  SHA512:
6
- metadata.gz: 13e0fd6911898fe1eed2b82ef27e55bdda1cbc4df5020694bdbf5236145a5490a1d518e0bb4a1716977644bfcb8e3ce4774bcc6257a76459a8d08a96b1b26a3b
7
- data.tar.gz: b9b3c932f1313b3d6909c88cb292a3be96f3d22b3246c8dc190853152b9897845ecc4621e12d46d2180f1af709c854d4743c17c1a60dfd33b093a7bc38f09725
6
+ metadata.gz: 322808ece20606194fff0ac606fe313d33d7ce5cbd942cb64475d0ca4f540c33dbdf03db87137594ae2960066a9873b4885dd4d7c44161cd850c7cac3eff94f2
7
+ data.tar.gz: 233cfec564e2985af9facf3c77617c221b4b823e740fd98fd00deb4faadec7a04124e49fb53528245430f11c2afe4bea8d31db8dc46f3078df6245d7871a186e
data/bin/yamd CHANGED
@@ -3,11 +3,12 @@
3
3
  require 'yamd/mangahere'
4
4
  require 'yamd/mangafox'
5
5
  require 'yamd/fakku'
6
+ require 'yamd/hentaicafe'
6
7
 
7
8
  unless ARGV.size > 0
8
9
  puts 'USAGE: yamd <manga main page url>'
9
10
  puts 'EXAMPLE: yamd http://www.mangahere.com/manga/asagao_to_kase_san/'
10
- puts 'Support mangahere and mangafox sites so far.'
11
+ #puts 'Support mangahere and mangafox sites so far.'
11
12
  exit
12
13
  end
13
14
 
@@ -18,6 +19,8 @@ elsif /mangahere/.match(manga_main_page_url)
18
19
  manga = MangahereCrawler.new(manga_main_page_url)
19
20
  elsif /fakku/.match(manga_main_page_url)
20
21
  manga = FakkuCrawler.new(manga_main_page_url)
22
+ elsif /hentai\.cafe/.match(manga_main_page_url)
23
+ manga = HentaiCafeCrawler.new(manga_main_page_url)
21
24
  else
22
25
  puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
23
26
  end
@@ -1,31 +1,52 @@
1
1
  require 'nokogiri'
2
2
  require 'open-uri'
3
3
  require 'addressable/uri'
4
+ require 'resolv-replace'
4
5
  require 'pathname'
5
6
 
6
7
  require 'capybara'
7
8
  require 'capybara/poltergeist'
9
+ #require 'phantomjs'
8
10
 
9
11
  Capybara.register_driver(:poltergeist) do | app |
10
- Capybara::Poltergeist::Driver.new(app, js_errors: false)
12
+ #Capybara::Poltergeist::Driver.new(app, { js_errors: false})
13
+ #Capybara::Poltergeist::Driver.new(app, { phantomjs: Phantomjs.path, js_errors: false})
14
+ Capybara::Poltergeist::Driver.new(app, {
15
+ # this blacklist was needed to unbloat mangahere downloader,
16
+ # without it the mangahere downloader often timeout
17
+ url_blacklist: [
18
+ 'googletagmanager.com',
19
+ 'googleapis.com',
20
+ 'facebook.net',
21
+ 'facebook.com',
22
+ 'adtrue.com',
23
+ 'z6.com',
24
+ 'sharethis.com',
25
+ 'puserving.com'
26
+ ],
27
+ js_errors: false,
28
+ phantomjs_options: ['--ignore-ssl-errors=yes', '--load-images=false']
29
+ })
11
30
  end
12
31
 
13
32
  Capybara.default_driver = :poltergeist
14
33
  Capybara.run_server = false
15
- $internet = Capybara.current_session
34
+ $session = Capybara.current_session
16
35
 
17
36
  def my_open(url)
18
- $internet.visit url
19
-
20
- $internet.html
37
+ puts "visiting " + url
38
+ $session.visit url
39
+
40
+ $session.html
21
41
  end
22
42
 
23
43
  class PageCrawler
24
- attr_reader :custom_data, :url, :parsed_html, :number, :chapter
44
+ attr_reader :custom_data, :url, :uri, :parsed_html, :number, :chapter
25
45
 
26
46
  def initialize(custom_data, parsed_html, number, chapter)
27
47
  @custom_data = custom_data
28
48
  @url = custom_data[:url]
49
+ @uri = Addressable::URI.heuristic_parse(url)
29
50
  @parsed_html = parsed_html
30
51
  @number = number
31
52
  @chapter = chapter
@@ -34,14 +55,19 @@ class PageCrawler
34
55
  def image_url
35
56
  fail 'This method is abstract and have to be defined in a subclass.'
36
57
  end
58
+
59
+ def clean_image_url
60
+ @uri.join(image_url).normalize.to_s
61
+ end
37
62
  end
38
63
 
39
64
  class ChapterCrawler
40
- attr_reader :custom_data, :url, :parsed_html, :number, :manga
65
+ attr_reader :custom_data, :url, :uri, :parsed_html, :number, :manga
41
66
 
42
67
  def initialize(custom_data, chapter_page, number, manga)
43
68
  @custom_data = custom_data
44
69
  @url = custom_data[:url]
70
+ @uri = Addressable::URI.heuristic_parse(url)
45
71
  @number = number
46
72
  @parsed_html = chapter_page
47
73
  @manga = manga
@@ -59,7 +85,10 @@ class ChapterCrawler
59
85
  Enumerator.new do | yielder |
60
86
  number = 1
61
87
  pages_info.each do | page_info |
62
- parsed_html = Nokogiri::HTML(my_open(page_info[:url]))
88
+ # fix the url to be absolute
89
+ full_url = @uri.join(page_info[:url]).to_s
90
+ page_info[:url] = full_url
91
+ parsed_html = Nokogiri::HTML(my_open(full_url))
63
92
  yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
64
93
  number += 1
65
94
  end
@@ -72,11 +101,12 @@ class ChapterCrawler
72
101
  end
73
102
 
74
103
  class MangaCrawler
75
- attr_accessor :url, :parsed_html
104
+ attr_accessor :url, :uri, :parsed_html
76
105
 
77
106
  def initialize(manga_main_page_url)
78
107
  @url = manga_main_page_url
79
- @parsed_html = Nokogiri::HTML(open(manga_main_page_url))
108
+ @uri = Addressable::URI.heuristic_parse(url)
109
+ @parsed_html = Nokogiri::HTML(my_open(manga_main_page_url))
80
110
  end
81
111
 
82
112
  def chapters_info
@@ -87,7 +117,10 @@ class MangaCrawler
87
117
  Enumerator.new do | yielder |
88
118
  number = 1
89
119
  chapters_info.each do | chapter_info |
90
- page = Nokogiri::HTML(my_open(chapter_info[:url]))
120
+ # fix the url to be absolute
121
+ full_url = @uri.join(chapter_info[:url]).to_s
122
+ chapter_info[:url] = full_url
123
+ page = Nokogiri::HTML(my_open(full_url))
91
124
  yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
92
125
  number += 1
93
126
  end
@@ -143,9 +176,10 @@ class ImageDownloader
143
176
  page_name = self.class.format_page_name(page, chapter, manga)
144
177
  page_abs_path = chapter_dir.join(page_name).to_s
145
178
  File.open(page_abs_path, 'wb') do | f |
146
- safe_uri = URI.encode(page.image_url, '[]')
147
- open(safe_uri, 'rb') do | image |
148
- f.write(image.read)
179
+ open(page.clean_image_url) do | image |
180
+ # TODO: check if copy_stream avoids alloacting the whole image in
181
+ # memory before starting to flush it
182
+ IO.copy_stream(image, f)
149
183
  end
150
184
  end
151
185
  end
@@ -0,0 +1,44 @@
1
+ require 'yamd'
2
+
3
+ class MangaherePage < PageCrawler
4
+ def image_url
5
+ @parsed_html.at_css('#viewer a img')['src']
6
+ end
7
+ end
8
+
9
+ class MangahereChapter < ChapterCrawler
10
+ def self.page_class
11
+ MangaherePage
12
+ end
13
+
14
+ def pages_info
15
+ # there's no need of an lazy enumerator here, no IO action is taken
16
+ page_options = @parsed_html.at_css('.prew_page + select').css('option')
17
+ page_urls = []
18
+ page_options.each do | option |
19
+ page_urls << { url: option['value'] }
20
+ end
21
+ page_urls
22
+ end
23
+
24
+ def name
25
+ @custom_data[:name]
26
+ end
27
+ end
28
+
29
+ class MangahereCrawler < MangaCrawler
30
+ def self.chapter_class
31
+ MangahereChapter
32
+ end
33
+
34
+ def chapters_info
35
+ url = URI.join(self.url, @parsed_html.at_css('a.button.green')['href'])
36
+ [{ name: 'OnlyChapter',
37
+ url: url }]
38
+ end
39
+
40
+ def name
41
+ @parsed_html.at_css('h1').text.strip
42
+ end
43
+ end
44
+
@@ -0,0 +1,44 @@
1
+ require 'yamd'
2
+
3
+ class HentaiCafePage < PageCrawler
4
+ def image_url
5
+ @parsed_html.at_css('#page img')['src']
6
+ end
7
+ end
8
+
9
+ class HentaiCafeChapter < ChapterCrawler
10
+ def self.page_class
11
+ HentaiCafePage
12
+ end
13
+
14
+ def pages_info
15
+ # there's no need of an lazy enumerator here, no IO action is taken
16
+ page_list = @parsed_html.at_css('ul.dropdown').css('li')
17
+ page_urls = []
18
+ page_list.each do | li_el |
19
+ page_urls << { url: li_el.css('a').first['href'] }
20
+ end
21
+ page_urls
22
+ end
23
+
24
+ def name
25
+ @custom_data[:name]
26
+ end
27
+ end
28
+
29
+ class HentaiCafeCrawler < MangaCrawler
30
+ def self.chapter_class
31
+ HentaiCafeChapter
32
+ end
33
+
34
+ def chapters_info
35
+ css = 'a.x-btn.x-btn-flat.x-btn-rounded.x-btn-large'
36
+ [{ name: 'OnlyChapter',
37
+ url: @parsed_html.at_css(css)['href'] }]
38
+ end
39
+
40
+ def name
41
+ @parsed_html.at_css('h3').text.strip
42
+ end
43
+ end
44
+
@@ -2,7 +2,7 @@ require 'yamd'
2
2
 
3
3
  class MangaherePage < PageCrawler
4
4
  def image_url
5
- @parsed_html.at_css('#viewer a img')['src']
5
+ @parsed_html.at_css('#viewer a img:not(.loadingImg)')['src']
6
6
  end
7
7
  end
8
8
 
@@ -18,7 +18,12 @@ class MangahereChapter < ChapterCrawler
18
18
  page_options.each do | option |
19
19
  page_urls << { url: option['value'] }
20
20
  end
21
- page_urls
21
+ # drop the 'featured' page at end of each chapter
22
+ if /featured/.match(page_urls.last[:url]) then
23
+ page_urls[0...-1]
24
+ else
25
+ page_urls
26
+ end
22
27
  end
23
28
 
24
29
  def name
@@ -33,9 +38,9 @@ class MangahereCrawler < MangaCrawler
33
38
 
34
39
  def chapters_info
35
40
  @parsed_html.css('.detail_list ul li a').reverse.map do | chapter_link |
36
- { name: chapter_link.text.strip,
37
- url: chapter_link['href']
38
- }
41
+ { name: chapter_link.text.strip,
42
+ url: chapter_link['href']
43
+ }
39
44
  end
40
45
  end
41
46
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yamd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henrique Becker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-25 00:00:00.000000000 Z
11
+ date: 2018-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.9'
75
+ version: '2.1'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.9'
82
+ version: '2.1'
83
83
  description: 'This gem offers: classes to subclass and create a manga site crawler;
84
84
  a dowloader to use with these classes; some site-specific scripts.'
85
85
  email: henriquebecker91@gmail.com
@@ -91,11 +91,13 @@ files:
91
91
  - bin/yamd
92
92
  - lib/yamd.rb
93
93
  - lib/yamd/fakku.rb
94
+ - lib/yamd/gehentai.rb
95
+ - lib/yamd/hentaicafe.rb
94
96
  - lib/yamd/mangafox.rb
95
97
  - lib/yamd/mangahere.rb
96
98
  homepage: http://rubygems.org/gems/yamd
97
99
  licenses:
98
- - Public domain
100
+ - Unlicense
99
101
  metadata: {}
100
102
  post_install_message:
101
103
  rdoc_options: []
@@ -113,7 +115,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
115
  version: '0'
114
116
  requirements: []
115
117
  rubyforge_project:
116
- rubygems_version: 2.4.5.1
118
+ rubygems_version: 2.6.13
117
119
  signing_key:
118
120
  specification_version: 4
119
121
  summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga