mangdown 0.10.2 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0fcefd9f91de261d15fdb0c165ec5be1852c9507
4
- data.tar.gz: 23b77f0de9dfd3b57bea6d7d23a2d63ed51165b6
3
+ metadata.gz: b33eb9d7a4f9548de2e7317602f8d2bfff7e2a29
4
+ data.tar.gz: d7bc3c791d10bfac54b24e212e382cddaab04923
5
5
  SHA512:
6
- metadata.gz: fffa3aca22c07425b59007d0d50500a01f28d8292763494ee2518e07348d98f1f8a921e54914db654b297be3c38ce99dae0c7c7cd2d2d87e7b6d3e3aead7acff
7
- data.tar.gz: 69c6e148cfefb9dd5906860ece93b5d8ead402b4ee71814cabcb33bbb991b6a81af747e4b3d74d2977561a467b3de5fee45c25181ec340891e02b23b1360c615
6
+ metadata.gz: 54deffd02793da865629ad2e655036fe014b1d9c532521b4c16ae75bebd2baecc8959fa2c7429f639dc5613df076ab895095fbf6bcfb6cde01bdd9bdc7a8834b
7
+ data.tar.gz: a1e6274cde3cab1e2b2c5c51fa0e33f3b880a7f2f811c4a758ce1e45651523aa2cf6acabd94555340db033605f7cedbf2d7b891bfc14f011155557de10e0f34b
@@ -0,0 +1,42 @@
1
+ module Mangdown
2
+ class Mangafox < Adapter::Base
3
+ Mangdown::ADAPTERS << self
4
+
5
+ def initialize(uri, doc)
6
+ super
7
+ @manga_list_css = 'div.manga_list li a'
8
+ @chapter_list_css = 'a.tips'
9
+ @root = 'http://mangafox.me'
10
+ @manga_list_uri = "#{@root}/manga"
11
+ @manga_link_prefix = ''
12
+ @reverse_chapters = true
13
+ @manga_uri_regex =
14
+ /#{@root}\/manga\/[^\/]+?\//i
15
+ @chapter_uri_regex =
16
+ /#{@manga_uri_regex}(v\d+\/)?(c\d+\/)(1\.html)/i
17
+ @page_uri_regex = /.+\.(png|jpg|jpeg)$/i
18
+ end
19
+
20
+ def build_page_uri(uri, manga, chapter, page_num)
21
+ uri.sub(/\d+\.html/, "#{page_num}.html")
22
+ end
23
+
24
+ def num_pages
25
+ doc.css('select')[1].css('option').length - 1
26
+ end
27
+
28
+ def page_image_src
29
+ page_image[:src]
30
+ end
31
+
32
+ def page_image_name
33
+ doc.css('select')[1].css('option[selected]').first().text
34
+ .rjust(3,'0')
35
+ end
36
+
37
+ private
38
+ def page_image
39
+ doc.css('img')[0]
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,46 @@
1
+ module Mangdown
2
+ class Mangahere < Adapter::Base
3
+ Mangdown::ADAPTERS << self
4
+
5
+ def initialize(uri, doc)
6
+ super
7
+ @root = 'http://www.mangahere.co'
8
+ @manga_list_css = 'a.manga_info'
9
+ @chapter_list_css = '.detail_list ul a'
10
+ @manga_list_uri = "#{@root}/mangalist/"
11
+ @manga_link_prefix = ''
12
+ @reverse_chapters = true
13
+ @manga_uri_regex =
14
+ /#{@root}\/manga\/([^\/]+\/)?/i
15
+ @chapter_uri_regex =
16
+ /#{@manga_uri_regex}(v\d+\/)?(c\d+\/)(1\.html)?/i
17
+ @page_uri_regex = /.+\.(png|jpg|jpeg)$/i
18
+ end
19
+
20
+ def build_page_uri(uri, manga, chapter, page_num)
21
+ if page_num == 1
22
+ uri
23
+ else
24
+ uri.sub(/\/[^\/]*$/, "/#{page_num}.html")
25
+ end
26
+ end
27
+
28
+ def num_pages
29
+ doc.css('.go_page span.right select')[0].css('option').length
30
+ end
31
+
32
+ def page_image_src
33
+ page_image[:src].sub(/\?.*$/, '')
34
+ end
35
+
36
+ def page_image_name
37
+ doc.css('.go_page span.right select')[0].css('option[selected]')
38
+ .text.rjust(3, '0')
39
+ end
40
+
41
+ private
42
+ def page_image
43
+ doc.css('.read_img img')[0]
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,13 @@
1
+ require_relative 'mangareader'
2
+
3
+ module Mangdown
4
+ class Mangapanda < Mangareader
5
+ Mangdown::ADAPTERS << self
6
+
7
+ def initialize(uri, doc)
8
+ @root = 'http://www.mangapanda.com'
9
+ super
10
+ end
11
+ end
12
+ end
13
+
@@ -0,0 +1,45 @@
1
+ module Mangdown
2
+ class Mangareader < Adapter::Base
3
+ Mangdown::ADAPTERS << self
4
+
5
+ def initialize(uri, doc)
6
+ super
7
+ @root ||= 'http://www.mangareader.net'
8
+ @manga_list_css = 'ul.series_alpha li a'
9
+ @chapter_list_css = 'div#chapterlist td a'
10
+ @manga_list_uri = "#{@root}/alphabetical"
11
+ @manga_link_prefix = @root
12
+ @reverse_chapters = false
13
+ @manga_uri_regex =
14
+ /#{@root}(\/\d+)?(\/[^\/]+)(\.html)?/i
15
+ @chapter_uri_regex =
16
+ /#{@root}(\/[^\/]+){1,2}\/(\d+|chapter-\d+\.html)/i
17
+ @page_uri_regex = /.+\.(png|jpg|jpeg)$/i
18
+ end
19
+
20
+ def build_page_uri(uri, manga, chapter, page_num)
21
+ "#{root}/#{manga.gsub(' ', '-')}/#{chapter}/#{page_num}"
22
+ end
23
+
24
+ def num_pages
25
+ doc.css('select')[1].css('option').length
26
+ end
27
+
28
+ def page_image_src
29
+ page_image[:src]
30
+ end
31
+
32
+ def page_image_name
33
+ page_image[:alt].sub(/([^\d]*)(\d+)(\.\w+)?$/) {
34
+ "#{Regexp.last_match[1]}" +
35
+ "#{Regexp.last_match[2].to_s.rjust(3, '0')}"
36
+ }
37
+ end
38
+
39
+ private
40
+ def page_image
41
+ doc.css('img')[0]
42
+ end
43
+ end
44
+ end
45
+
@@ -0,0 +1,44 @@
1
+ module Mangdown
2
+ class Wiemanga < Adapter::Base
3
+ Mangdown::ADAPTERS << self
4
+
5
+ def initialize(uri, doc)
6
+ super
7
+ @root = 'http://www.wiemanga.com'
8
+ @manga_list_uri =
9
+ "#{@root}/search/?name_sel=contain&author_sel=contain" +
10
+ "&completed_series=either"
11
+ @manga_list_css = 'a.resultbookname'
12
+ @chapter_list_css =
13
+ '.chapterlist tr:not(:first-child) .col1 a'
14
+ @manga_link_prefix = ''
15
+ @reverse_chapters = true
16
+ @manga_uri_regex = /#{@root}\/manga\/([^\/]+)(\.html)?/i
17
+ @chapter_uri_regex =
18
+ /#{@root}\/chapter\/([^\/]+)\/(\d+)(\/|\.html)?/i
19
+ @page_uri_regex = /.+\.(png|jpg|jpeg)$/i
20
+ end
21
+
22
+ def build_page_uri(uri, manga, chapter, page_num)
23
+ "#{uri}-#{page_num}.html"
24
+ end
25
+
26
+ def num_pages
27
+ doc.css('select#page')[0].css('option').length
28
+ end
29
+
30
+ def page_image_src
31
+ page_image[:src]
32
+ end
33
+
34
+ def page_image_name
35
+ doc.css('select#page')[0].css('option[selected]')[0]
36
+ .text.rjust(3, '0')
37
+ end
38
+
39
+ private
40
+ def page_image
41
+ doc.css('img#comicpic')[0]
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,85 @@
1
+ module Mangdown
2
+ module Adapter
3
+ class Base
4
+
5
+ attr_reader :root
6
+ def initialize(uri, doc)
7
+ @uri, @doc = uri, doc
8
+ #@root = ''
9
+ #@manga_list_css = ''
10
+ #@chapter_list_css = ''
11
+ #@manga_list_uri = ''
12
+ #@manga_link_prefix = ''
13
+ #@reverse_chapters = true || false
14
+ #@manga_uri_regex = /.*/i
15
+ #@chapter_uri_regex = /.*/i
16
+ #@page_uri_regex = /.*/i
17
+ end
18
+
19
+ def type
20
+ self.class.to_s.split('::').last.downcase.to_sym
21
+ end
22
+
23
+ # Must return true/false if uri represents a manga for adapter
24
+ def is_manga?(uri = @uri)
25
+ uri.slice(@manga_uri_regex) == uri
26
+ end
27
+
28
+ # Must return true/false if uri represents a chapter for adapter
29
+ def is_chapter?(uri = @uri)
30
+ uri.slice(@chapter_uri_regex) == uri
31
+ end
32
+
33
+ # Must return true/false if uri represents a page for adapter
34
+ def is_page?(uri = @uri)
35
+ uri.slice(@page_uri_regex) == uri
36
+ end
37
+
38
+ # Must return a uri for a page given the arguments
39
+ def build_page_uri(uri, manga, chapter, page_num)
40
+ end
41
+
42
+ # Must return the number of pages for a chapter
43
+ def num_pages
44
+ end
45
+
46
+ # Must return the src for the page image
47
+ def page_image_src
48
+ end
49
+
50
+ # Must return the name of the page image
51
+ def page_image_name
52
+ end
53
+
54
+ # If no block given, must return an array arrays
55
+ # [manga_uri, manga_name, adapter_type]
56
+ # If block given, then the block may alter this array
57
+ # Only valid mangas should be returned (using is_manga?(uri))
58
+ def manga_list
59
+ doc.css(@manga_list_css).map { |a|
60
+ manga = ["#{@manga_link_prefix}#{a[:href]}",a.text.strip,type]
61
+ next(nil) unless is_manga?(manga.first)
62
+ block_given? ? yield(manga) : manga
63
+ }.compact
64
+ end
65
+
66
+ # If no block given, must return an array arrays
67
+ # [chapter_uri, chapter_name, adapter_type]
68
+ # If block given, then the block may alter this array
69
+ # Only valid chapters should be returned (using is_chapter?(uri))
70
+ def manga_chapters
71
+ chapters = doc.css(@chapter_list_css).map { |a|
72
+ chapter = [(root + a[:href].sub(root, '')),a.text.strip,type]
73
+ next(nil) unless is_chapter?(chapter.first)
74
+ block_given? ? yield(chapter) : chapter
75
+ }.compact
76
+ @reverse_chapters ? chapters.reverse : chapters
77
+ end
78
+
79
+ private
80
+ def doc
81
+ @doc ||= Tools.get_doc(@uri)
82
+ end
83
+ end
84
+ end
85
+ end
@@ -39,110 +39,65 @@ module Mangdown
39
39
 
40
40
  # download all pages in a chapter
41
41
  def download_to(dir)
42
- dir = File.expand_path(dir + '/' + @name)
42
+ dir = Tools.relative_or_absolute_path(dir, @name)
43
+ pages = map {|page| page.to_page}
44
+ failed = []
45
+ succeeded = []
46
+
43
47
  Dir.mkdir(dir) unless Dir.exists?(dir)
44
-
45
- # Limit active threads to 20
46
- each_slice(20) do |slice|
47
- threads = []
48
- slice.each do |page|
49
- threads << Thread.new(page) do |this_page|
50
- this_page.to_page.download_to(dir)
48
+ Tools.hydra_streaming(pages) do |stage, page, data=nil|
49
+ case stage
50
+ when :failed
51
+ failed << page
52
+ when :succeeded
53
+ succeeded << page
54
+ when :before
55
+ path = page.file_path(dir)
56
+ !File.exist?(path)
57
+ when :body
58
+ unless failed.include?(page)
59
+ path = page.file_path(dir)
60
+ File.open(path, 'ab') { |file| file.write(data) }
51
61
  end
52
- end
53
- threads.each {|thread| thread.join}
54
- end
55
- end
56
-
57
- private
58
- # get page objects for all pages in a chapter
59
- def get_pages
60
- threads = []
61
-
62
- num_pages = get_num_pages(Tools.get_doc(uri))
63
- 1.upto(num_pages) do |num|
64
- threads << Thread.new(num) do |this_num|
65
- Tools.no_time_out do
66
- tries = 0
67
- until doc = get_page_doc(this_num) || tries > 2
68
- tries += 1
69
- end
70
- return unless doc
71
- @pages << get_page(doc)
72
- end
62
+ when :complete
63
+ unless failed.include?(page)
64
+ path = page.file_path(dir)
65
+ FileUtils.mv(path, "#{path}.#{Tools.file_type(path)}")
73
66
  end
74
67
  end
75
-
76
- threads.each {|thread| thread.join}
77
- end
78
-
79
- # get the number of pages in a chapter
80
- def get_num_pages(doc)
81
- # the select is a dropdown menu of chapter pages
82
- doc.css('select')[1].css('option').length
83
- end
84
-
85
- end
86
-
87
- # mangareader chapter object
88
- class MRChapter < Chapter
89
- private
90
- # get the doc for a given page number
91
- def get_page_doc(num)
92
- root = @properties.root
93
- manga = @manga.gsub(' ', '-')
94
- uri_str = "#{root}/#{manga}/#{@chapter}/#{num}"
95
- page_uri = Mangdown::Uri.new(uri_str).downcase
96
-
97
- Tools.get_doc(page_uri)
98
- rescue SocketError => error
99
- STDERR.puts( "#{error.message} | #{name} | #{num}" )
100
68
  end
101
-
102
- # get the page uri and name
103
- def get_page(doc)
104
- image = doc.css('img')[0]
69
+ FileUtils.rm_rf(dir) if succeeded.empty?
105
70
 
106
- MDHash.new(
107
- uri: image['src'],
108
- name: (image['alt'] + ".jpg"),
109
- site: @properties.type,
110
- )
111
- rescue NoMethodError => error
112
- puts 'doc was ' + doc.class
113
- end
114
- end
71
+ !succeeded.empty?
72
+ end
115
73
 
116
- # mangafox chapter object
117
- class MFChapter < Chapter
118
74
  private
119
- # get the doc for a given page number
120
- def get_page_doc(num)
121
- Tools.get_doc(
122
- Mangdown::Uri.new(
123
- @properties.root +
124
- "/manga/#{@manga.gsub(' ', '_')}/c#{@chapter}/" +
125
- "#{num}.html"
126
- ).downcase
127
- )
128
- rescue SocketError => error
129
- STDERR.puts( "#{error.message} | #{name} | #{num}" )
75
+ # get page objects for all pages in a chapter
76
+ def get_pages
77
+ pages = (1..@properties.num_pages).map {|num| get_page_hash(num)}
78
+
79
+ Tools.hydra(pages) do |page, body|
80
+ @pages << get_page(page.uri, Nokogiri::HTML(body))
130
81
  end
82
+ end
131
83
 
132
- # get the page name and uri
133
- def get_page(doc)
134
- image = doc.css('img')[0]
84
+ # get the doc for a given page number
85
+ def get_page_hash(num)
86
+ uri_str = @properties.build_page_uri(uri, @manga, @chapter, num)
135
87
 
136
- MDHash.new(
137
- uri: image[:src],
138
- name: image[:src].sub(/.+\//, ''),
139
- site: @properties.type,
140
- )
141
- end
88
+ MDHash.new(
89
+ uri: Mangdown::Uri.new(uri_str).downcase, name: num
90
+ )
91
+ end
142
92
 
143
- # get the number of pages
144
- def get_num_pages(doc)
145
- super - 1
146
- end
147
- end
93
+ # get the page name and uri
94
+ def get_page(uri, doc)
95
+ properties = Properties.new(uri, nil, doc)
96
+ MDHash.new(
97
+ uri: properties.page_image_src,
98
+ name: properties.page_image_name,
99
+ site: properties.type
100
+ )
101
+ end
102
+ end
148
103
  end
@@ -7,6 +7,14 @@ module M
7
7
  HELP_FILE_PATH = File.expand_path(
8
8
  '../../doc/help.txt', File.dirname(__FILE__)
9
9
  )
10
+ MANGA_PAGES = (1..9).map { |p|
11
+ "http://www.wiemanga.com/search/?name_sel=contain" +
12
+ "&author_sel=contain&completed_series=either&page=#{p}.html"
13
+ } +
14
+ [
15
+ 'http://www.mangareader.net/alphabetical',
16
+ 'http://mangafox.me/manga/'
17
+ ]
10
18
 
11
19
  # return a list of hash with :uri and :name of mangas found in list
12
20
  def find(search)
@@ -53,12 +61,12 @@ module M
53
61
  data = data_from_file
54
62
  return MangaList.from_data(data) if data.is_a? Array
55
63
 
56
- MangaList.new(
57
- 'http://www.mangareader.net/alphabetical',
58
- 'http://mangafox.me/manga/'
59
- ).tap { |list| File.open(path,'w+') {|f| f.write(list.to_yaml)} }
64
+ MangaList.new(*MANGA_PAGES).tap { |list|
65
+ File.open(path,'w+') {|f| f.write(list.to_yaml)}
66
+ }
60
67
  rescue Object => error
61
68
  puts "#{path} is corrupt: #{error.message}"
69
+ raise
62
70
  end
63
71
 
64
72
  # check if the search key contains letters or numbers
@@ -15,10 +15,9 @@ module Mangdown
15
15
  @name = name
16
16
  @uri = Mangdown::Uri.new(uri)
17
17
  @chapters = []
18
- @properties = Properties.new(@uri)
18
+ @properties = Properties.new(uri)
19
19
 
20
20
  get_chapters
21
- @chapters.select! { |chapter| @properties.is_chapter?(chapter) }
22
21
  end
23
22
 
24
23
  def inspect
@@ -41,8 +40,12 @@ module Mangdown
41
40
  bar = progress_bar(start, stop)
42
41
  reset(start, stop)
43
42
  loop do
44
- self.next.to_chapter.download_to(dir)
45
- bar.increment!
43
+ chapter = self.next.to_chapter
44
+ if chapter.download_to(dir)
45
+ bar.increment!
46
+ else
47
+ STDERR.puts("error: #{chapter.name} was not downloaded")
48
+ end
46
49
  end
47
50
  end
48
51
 
@@ -70,19 +73,9 @@ module Mangdown
70
73
 
71
74
  # get push MDHashes of manga chapters to @chapters
72
75
  def get_chapters
73
- doc = Tools.get_doc(@uri)
74
- root = @properties.root
75
-
76
- #get the link with chapter name and uri
77
- doc.css(@properties.manga_css_klass).each do |chapter|
78
- @chapters << MDHash.new(
79
- uri: (root + chapter[:href].sub(root, '')),
80
- name: chapter.text,
81
- site: @properties.type,
82
- )
76
+ @chapters += @properties.manga_chapters do |uri, name, site|
77
+ MDHash.new(uri: uri, name: name, site: site)
83
78
  end
84
-
85
- @chapters.reverse! if @properties.reverse
86
79
  end
87
80
 
88
81
  def chapter_indeces(start, stop)
@@ -90,14 +83,16 @@ module Mangdown
90
83
  end
91
84
 
92
85
  def setup_download_dir!(dir)
93
- dir += "/#{name}"
94
- Dir.mkdir(dir) unless Dir.exist?(dir)
95
- dir
86
+ "#{dir}/#{name}".tap {|dir| Dir.mkdir(dir) unless Dir.exist?(dir)}
96
87
  end
97
88
 
98
89
  def validate_indeces!(start, stop)
99
90
  i_start, i_stop = chapter_indeces(start, stop)
100
- if i_stop < i_stop
91
+ if i_start.nil? || i_stop.nil?
92
+ last = chapters.length - 1
93
+ error = "This manga has chapters in the range (0..#{last})"
94
+ raise ArgumentError, error
95
+ elsif i_stop < i_stop
101
96
  error = 'Last index must be greater than first index'
102
97
  raise ArgumentError, error
103
98
  end
@@ -22,21 +22,11 @@ module Mangdown
22
22
  @mangas.map(&:to_hash).to_yaml
23
23
  end
24
24
 
25
+ private
25
26
  # get a list of mangas from the uri
26
27
  def get_mangas(uri)
27
- properties = Properties.new(uri)
28
- if properties.empty?
29
- raise ArgumentError,
30
- "Bad URI: No Properties Specified for URI <#{uri}>"
31
- end
32
-
33
- doc = Tools.get_doc(uri)
34
- # This should be put in a tool
35
- doc.css(properties.manga_list_css_klass).each do |a|
36
- @mangas << MDHash.new(
37
- uri: "#{properties.manga_link_prefix}#{a[:href]}",
38
- name: a.text
39
- )
28
+ @mangas += Properties.new(uri).manga_list do |uri, name|
29
+ MDHash.new(uri: uri, name: name)
40
30
  end
41
31
  end
42
32
  end
@@ -1,4 +1,6 @@
1
1
  module Mangdown
2
+ ADAPTERS = []
3
+
2
4
  module Equality
3
5
  def eql?(other)
4
6
  (self.name == other.name) && (self.uri == other.uri)
@@ -8,5 +10,10 @@ module Mangdown
8
10
  # puts "You may want to use :eql?"
9
11
  super
10
12
  end
13
+
14
+ # space ship operator for sorting
15
+ def <=>(other)
16
+ self.name <=> other.name
17
+ end
11
18
  end
12
19
  end
@@ -2,10 +2,8 @@ module Mangdown
2
2
  class MDHash
3
3
  include Equality
4
4
 
5
- attr_reader :properties
6
-
7
5
  def initialize(options = {})
8
- @properties = Properties.new(options[:site] || options[:uri])
6
+ @properties = Properties.new(options[:uri], options[:site])
9
7
 
10
8
  @hash = {}
11
9
  [:uri, :name].each {|key| @hash[key] = options.fetch(key)}
@@ -15,7 +13,7 @@ module Mangdown
15
13
 
16
14
  # explicit conversion to manga
17
15
  def to_manga
18
- if @properties.is_manga?(self)
16
+ if @properties.is_manga?
19
17
  Manga.new(name, uri)
20
18
  else
21
19
  raise NoMethodError, 'This is not a known manga type'
@@ -24,8 +22,8 @@ module Mangdown
24
22
 
25
23
  # explicit conversion to chapter
26
24
  def to_chapter
27
- if @properties.is_chapter?(self)
28
- @properties.chapter_klass.new(name, uri)
25
+ if @properties.is_chapter?
26
+ Chapter.new(name, uri)
29
27
  else
30
28
  raise NoMethodError, 'This is not a known chapter type'
31
29
  end
@@ -33,7 +31,7 @@ module Mangdown
33
31
 
34
32
  # explicit conversion to page
35
33
  def to_page
36
- if @properties.is_page?(self)
34
+ if @properties.is_page?
37
35
  Page.new(name, uri)
38
36
  else
39
37
  raise NoMethodError, 'This is not a known page type'
@@ -50,6 +48,16 @@ module Mangdown
50
48
  @hash[:uri]
51
49
  end
52
50
 
51
+ # name writer
52
+ def name=(other)
53
+ @hash[:name] = other
54
+ end
55
+
56
+ # uri writer
57
+ def uri=(other)
58
+ @hash[:uri] = other
59
+ end
60
+
53
61
  def [](key)
54
62
  @hash[key]
55
63
  end