mangdown 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0fcefd9f91de261d15fdb0c165ec5be1852c9507
4
- data.tar.gz: 23b77f0de9dfd3b57bea6d7d23a2d63ed51165b6
3
+ metadata.gz: b33eb9d7a4f9548de2e7317602f8d2bfff7e2a29
4
+ data.tar.gz: d7bc3c791d10bfac54b24e212e382cddaab04923
5
5
  SHA512:
6
- metadata.gz: fffa3aca22c07425b59007d0d50500a01f28d8292763494ee2518e07348d98f1f8a921e54914db654b297be3c38ce99dae0c7c7cd2d2d87e7b6d3e3aead7acff
7
- data.tar.gz: 69c6e148cfefb9dd5906860ece93b5d8ead402b4ee71814cabcb33bbb991b6a81af747e4b3d74d2977561a467b3de5fee45c25181ec340891e02b23b1360c615
6
+ metadata.gz: 54deffd02793da865629ad2e655036fe014b1d9c532521b4c16ae75bebd2baecc8959fa2c7429f639dc5613df076ab895095fbf6bcfb6cde01bdd9bdc7a8834b
7
+ data.tar.gz: a1e6274cde3cab1e2b2c5c51fa0e33f3b880a7f2f811c4a758ce1e45651523aa2cf6acabd94555340db033605f7cedbf2d7b891bfc14f011155557de10e0f34b
@@ -0,0 +1,42 @@
1
+ module Mangdown
2
+ class Mangafox < Adapter::Base
3
+ Mangdown::ADAPTERS << self
4
+
5
+ def initialize(uri, doc)
6
+ super
7
+ @manga_list_css = 'div.manga_list li a'
8
+ @chapter_list_css = 'a.tips'
9
+ @root = 'http://mangafox.me'
10
+ @manga_list_uri = "#{@root}/manga"
11
+ @manga_link_prefix = ''
12
+ @reverse_chapters = true
13
+ @manga_uri_regex =
14
+ /#{@root}\/manga\/[^\/]+?\//i
15
+ @chapter_uri_regex =
16
+ /#{@manga_uri_regex}(v\d+\/)?(c\d+\/)(1\.html)/i
17
+ @page_uri_regex = /.+\.(png|jpg|jpeg)$/i
18
+ end
19
+
20
+ def build_page_uri(uri, manga, chapter, page_num)
21
+ uri.sub(/\d+\.html/, "#{page_num}.html")
22
+ end
23
+
24
+ def num_pages
25
+ doc.css('select')[1].css('option').length - 1
26
+ end
27
+
28
+ def page_image_src
29
+ page_image[:src]
30
+ end
31
+
32
+ def page_image_name
33
+ doc.css('select')[1].css('option[selected]').first().text
34
+ .rjust(3,'0')
35
+ end
36
+
37
+ private
38
+ def page_image
39
+ doc.css('img')[0]
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,46 @@
1
+ module Mangdown
2
+ class Mangahere < Adapter::Base
3
+ Mangdown::ADAPTERS << self
4
+
5
+ def initialize(uri, doc)
6
+ super
7
+ @root = 'http://www.mangahere.co'
8
+ @manga_list_css = 'a.manga_info'
9
+ @chapter_list_css = '.detail_list ul a'
10
+ @manga_list_uri = "#{@root}/mangalist/"
11
+ @manga_link_prefix = ''
12
+ @reverse_chapters = true
13
+ @manga_uri_regex =
14
+ /#{@root}\/manga\/([^\/]+\/)?/i
15
+ @chapter_uri_regex =
16
+ /#{@manga_uri_regex}(v\d+\/)?(c\d+\/)(1\.html)?/i
17
+ @page_uri_regex = /.+\.(png|jpg|jpeg)$/i
18
+ end
19
+
20
+ def build_page_uri(uri, manga, chapter, page_num)
21
+ if page_num == 1
22
+ uri
23
+ else
24
+ uri.sub(/\/[^\/]*$/, "/#{page_num}.html")
25
+ end
26
+ end
27
+
28
+ def num_pages
29
+ doc.css('.go_page span.right select')[0].css('option').length
30
+ end
31
+
32
+ def page_image_src
33
+ page_image[:src].sub(/\?.*$/, '')
34
+ end
35
+
36
+ def page_image_name
37
+ doc.css('.go_page span.right select')[0].css('option[selected]')
38
+ .text.rjust(3, '0')
39
+ end
40
+
41
+ private
42
+ def page_image
43
+ doc.css('.read_img img')[0]
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,13 @@
1
+ require_relative 'mangareader'
2
+
3
+ module Mangdown
4
+ class Mangapanda < Mangareader
5
+ Mangdown::ADAPTERS << self
6
+
7
+ def initialize(uri, doc)
8
+ @root = 'http://www.mangapanda.com'
9
+ super
10
+ end
11
+ end
12
+ end
13
+
@@ -0,0 +1,45 @@
1
+ module Mangdown
2
+ class Mangareader < Adapter::Base
3
+ Mangdown::ADAPTERS << self
4
+
5
+ def initialize(uri, doc)
6
+ super
7
+ @root ||= 'http://www.mangareader.net'
8
+ @manga_list_css = 'ul.series_alpha li a'
9
+ @chapter_list_css = 'div#chapterlist td a'
10
+ @manga_list_uri = "#{@root}/alphabetical"
11
+ @manga_link_prefix = @root
12
+ @reverse_chapters = false
13
+ @manga_uri_regex =
14
+ /#{@root}(\/\d+)?(\/[^\/]+)(\.html)?/i
15
+ @chapter_uri_regex =
16
+ /#{@root}(\/[^\/]+){1,2}\/(\d+|chapter-\d+\.html)/i
17
+ @page_uri_regex = /.+\.(png|jpg|jpeg)$/i
18
+ end
19
+
20
+ def build_page_uri(uri, manga, chapter, page_num)
21
+ "#{root}/#{manga.gsub(' ', '-')}/#{chapter}/#{page_num}"
22
+ end
23
+
24
+ def num_pages
25
+ doc.css('select')[1].css('option').length
26
+ end
27
+
28
+ def page_image_src
29
+ page_image[:src]
30
+ end
31
+
32
+ def page_image_name
33
+ page_image[:alt].sub(/([^\d]*)(\d+)(\.\w+)?$/) {
34
+ "#{Regexp.last_match[1]}" +
35
+ "#{Regexp.last_match[2].to_s.rjust(3, '0')}"
36
+ }
37
+ end
38
+
39
+ private
40
+ def page_image
41
+ doc.css('img')[0]
42
+ end
43
+ end
44
+ end
45
+
@@ -0,0 +1,44 @@
1
+ module Mangdown
2
+ class Wiemanga < Adapter::Base
3
+ Mangdown::ADAPTERS << self
4
+
5
+ def initialize(uri, doc)
6
+ super
7
+ @root = 'http://www.wiemanga.com'
8
+ @manga_list_uri =
9
+ "#{@root}/search/?name_sel=contain&author_sel=contain" +
10
+ "&completed_series=either"
11
+ @manga_list_css = 'a.resultbookname'
12
+ @chapter_list_css =
13
+ '.chapterlist tr:not(:first-child) .col1 a'
14
+ @manga_link_prefix = ''
15
+ @reverse_chapters = true
16
+ @manga_uri_regex = /#{@root}\/manga\/([^\/]+)(\.html)?/i
17
+ @chapter_uri_regex =
18
+ /#{@root}\/chapter\/([^\/]+)\/(\d+)(\/|\.html)?/i
19
+ @page_uri_regex = /.+\.(png|jpg|jpeg)$/i
20
+ end
21
+
22
+ def build_page_uri(uri, manga, chapter, page_num)
23
+ "#{uri}-#{page_num}.html"
24
+ end
25
+
26
+ def num_pages
27
+ doc.css('select#page')[0].css('option').length
28
+ end
29
+
30
+ def page_image_src
31
+ page_image[:src]
32
+ end
33
+
34
+ def page_image_name
35
+ doc.css('select#page')[0].css('option[selected]')[0]
36
+ .text.rjust(3, '0')
37
+ end
38
+
39
+ private
40
+ def page_image
41
+ doc.css('img#comicpic')[0]
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,85 @@
1
+ module Mangdown
2
+ module Adapter
3
+ class Base
4
+
5
+ attr_reader :root
6
+ def initialize(uri, doc)
7
+ @uri, @doc = uri, doc
8
+ #@root = ''
9
+ #@manga_list_css = ''
10
+ #@chapter_list_css = ''
11
+ #@manga_list_uri = ''
12
+ #@manga_link_prefix = ''
13
+ #@reverse_chapters = true || false
14
+ #@manga_uri_regex = /.*/i
15
+ #@chapter_uri_regex = /.*/i
16
+ #@page_uri_regex = /.*/i
17
+ end
18
+
19
+ def type
20
+ self.class.to_s.split('::').last.downcase.to_sym
21
+ end
22
+
23
+ # Must return true/false if uri represents a manga for adapter
24
+ def is_manga?(uri = @uri)
25
+ uri.slice(@manga_uri_regex) == uri
26
+ end
27
+
28
+ # Must return true/false if uri represents a chapter for adapter
29
+ def is_chapter?(uri = @uri)
30
+ uri.slice(@chapter_uri_regex) == uri
31
+ end
32
+
33
+ # Must return true/false if uri represents a page for adapter
34
+ def is_page?(uri = @uri)
35
+ uri.slice(@page_uri_regex) == uri
36
+ end
37
+
38
+ # Must return a uri for a page given the arguments
39
+ def build_page_uri(uri, manga, chapter, page_num)
40
+ end
41
+
42
+ # Must return the number of pages for a chapter
43
+ def num_pages
44
+ end
45
+
46
+ # Must return the src for the page image
47
+ def page_image_src
48
+ end
49
+
50
+ # Must return the name of the page image
51
+ def page_image_name
52
+ end
53
+
54
+ # If no block given, must return an array arrays
55
+ # [manga_uri, manga_name, adapter_type]
56
+ # If block given, then the block may alter this array
57
+ # Only valid mangas should be returned (using is_manga?(uri))
58
+ def manga_list
59
+ doc.css(@manga_list_css).map { |a|
60
+ manga = ["#{@manga_link_prefix}#{a[:href]}",a.text.strip,type]
61
+ next(nil) unless is_manga?(manga.first)
62
+ block_given? ? yield(manga) : manga
63
+ }.compact
64
+ end
65
+
66
+ # If no block given, must return an array arrays
67
+ # [chapter_uri, chapter_name, adapter_type]
68
+ # If block given, then the block may alter this array
69
+ # Only valid chapters should be returned (using is_chapter?(uri))
70
+ def manga_chapters
71
+ chapters = doc.css(@chapter_list_css).map { |a|
72
+ chapter = [(root + a[:href].sub(root, '')),a.text.strip,type]
73
+ next(nil) unless is_chapter?(chapter.first)
74
+ block_given? ? yield(chapter) : chapter
75
+ }.compact
76
+ @reverse_chapters ? chapters.reverse : chapters
77
+ end
78
+
79
+ private
80
+ def doc
81
+ @doc ||= Tools.get_doc(@uri)
82
+ end
83
+ end
84
+ end
85
+ end
@@ -39,110 +39,65 @@ module Mangdown
39
39
 
40
40
  # download all pages in a chapter
41
41
  def download_to(dir)
42
- dir = File.expand_path(dir + '/' + @name)
42
+ dir = Tools.relative_or_absolute_path(dir, @name)
43
+ pages = map {|page| page.to_page}
44
+ failed = []
45
+ succeeded = []
46
+
43
47
  Dir.mkdir(dir) unless Dir.exists?(dir)
44
-
45
- # Limit active threads to 20
46
- each_slice(20) do |slice|
47
- threads = []
48
- slice.each do |page|
49
- threads << Thread.new(page) do |this_page|
50
- this_page.to_page.download_to(dir)
48
+ Tools.hydra_streaming(pages) do |stage, page, data=nil|
49
+ case stage
50
+ when :failed
51
+ failed << page
52
+ when :succeeded
53
+ succeeded << page
54
+ when :before
55
+ path = page.file_path(dir)
56
+ !File.exist?(path)
57
+ when :body
58
+ unless failed.include?(page)
59
+ path = page.file_path(dir)
60
+ File.open(path, 'ab') { |file| file.write(data) }
51
61
  end
52
- end
53
- threads.each {|thread| thread.join}
54
- end
55
- end
56
-
57
- private
58
- # get page objects for all pages in a chapter
59
- def get_pages
60
- threads = []
61
-
62
- num_pages = get_num_pages(Tools.get_doc(uri))
63
- 1.upto(num_pages) do |num|
64
- threads << Thread.new(num) do |this_num|
65
- Tools.no_time_out do
66
- tries = 0
67
- until doc = get_page_doc(this_num) || tries > 2
68
- tries += 1
69
- end
70
- return unless doc
71
- @pages << get_page(doc)
72
- end
62
+ when :complete
63
+ unless failed.include?(page)
64
+ path = page.file_path(dir)
65
+ FileUtils.mv(path, "#{path}.#{Tools.file_type(path)}")
73
66
  end
74
67
  end
75
-
76
- threads.each {|thread| thread.join}
77
- end
78
-
79
- # get the number of pages in a chapter
80
- def get_num_pages(doc)
81
- # the select is a dropdown menu of chapter pages
82
- doc.css('select')[1].css('option').length
83
- end
84
-
85
- end
86
-
87
- # mangareader chapter object
88
- class MRChapter < Chapter
89
- private
90
- # get the doc for a given page number
91
- def get_page_doc(num)
92
- root = @properties.root
93
- manga = @manga.gsub(' ', '-')
94
- uri_str = "#{root}/#{manga}/#{@chapter}/#{num}"
95
- page_uri = Mangdown::Uri.new(uri_str).downcase
96
-
97
- Tools.get_doc(page_uri)
98
- rescue SocketError => error
99
- STDERR.puts( "#{error.message} | #{name} | #{num}" )
100
68
  end
101
-
102
- # get the page uri and name
103
- def get_page(doc)
104
- image = doc.css('img')[0]
69
+ FileUtils.rm_rf(dir) if succeeded.empty?
105
70
 
106
- MDHash.new(
107
- uri: image['src'],
108
- name: (image['alt'] + ".jpg"),
109
- site: @properties.type,
110
- )
111
- rescue NoMethodError => error
112
- puts 'doc was ' + doc.class
113
- end
114
- end
71
+ !succeeded.empty?
72
+ end
115
73
 
116
- # mangafox chapter object
117
- class MFChapter < Chapter
118
74
  private
119
- # get the doc for a given page number
120
- def get_page_doc(num)
121
- Tools.get_doc(
122
- Mangdown::Uri.new(
123
- @properties.root +
124
- "/manga/#{@manga.gsub(' ', '_')}/c#{@chapter}/" +
125
- "#{num}.html"
126
- ).downcase
127
- )
128
- rescue SocketError => error
129
- STDERR.puts( "#{error.message} | #{name} | #{num}" )
75
+ # get page objects for all pages in a chapter
76
+ def get_pages
77
+ pages = (1..@properties.num_pages).map {|num| get_page_hash(num)}
78
+
79
+ Tools.hydra(pages) do |page, body|
80
+ @pages << get_page(page.uri, Nokogiri::HTML(body))
130
81
  end
82
+ end
131
83
 
132
- # get the page name and uri
133
- def get_page(doc)
134
- image = doc.css('img')[0]
84
+ # get the doc for a given page number
85
+ def get_page_hash(num)
86
+ uri_str = @properties.build_page_uri(uri, @manga, @chapter, num)
135
87
 
136
- MDHash.new(
137
- uri: image[:src],
138
- name: image[:src].sub(/.+\//, ''),
139
- site: @properties.type,
140
- )
141
- end
88
+ MDHash.new(
89
+ uri: Mangdown::Uri.new(uri_str).downcase, name: num
90
+ )
91
+ end
142
92
 
143
- # get the number of pages
144
- def get_num_pages(doc)
145
- super - 1
146
- end
147
- end
93
+ # get the page name and uri
94
+ def get_page(uri, doc)
95
+ properties = Properties.new(uri, nil, doc)
96
+ MDHash.new(
97
+ uri: properties.page_image_src,
98
+ name: properties.page_image_name,
99
+ site: properties.type
100
+ )
101
+ end
102
+ end
148
103
  end
@@ -7,6 +7,14 @@ module M
7
7
  HELP_FILE_PATH = File.expand_path(
8
8
  '../../doc/help.txt', File.dirname(__FILE__)
9
9
  )
10
+ MANGA_PAGES = (1..9).map { |p|
11
+ "http://www.wiemanga.com/search/?name_sel=contain" +
12
+ "&author_sel=contain&completed_series=either&page=#{p}.html"
13
+ } +
14
+ [
15
+ 'http://www.mangareader.net/alphabetical',
16
+ 'http://mangafox.me/manga/'
17
+ ]
10
18
 
11
19
  # return a list of hash with :uri and :name of mangas found in list
12
20
  def find(search)
@@ -53,12 +61,12 @@ module M
53
61
  data = data_from_file
54
62
  return MangaList.from_data(data) if data.is_a? Array
55
63
 
56
- MangaList.new(
57
- 'http://www.mangareader.net/alphabetical',
58
- 'http://mangafox.me/manga/'
59
- ).tap { |list| File.open(path,'w+') {|f| f.write(list.to_yaml)} }
64
+ MangaList.new(*MANGA_PAGES).tap { |list|
65
+ File.open(path,'w+') {|f| f.write(list.to_yaml)}
66
+ }
60
67
  rescue Object => error
61
68
  puts "#{path} is corrupt: #{error.message}"
69
+ raise
62
70
  end
63
71
 
64
72
  # check if the search key contains letters or numbers
@@ -15,10 +15,9 @@ module Mangdown
15
15
  @name = name
16
16
  @uri = Mangdown::Uri.new(uri)
17
17
  @chapters = []
18
- @properties = Properties.new(@uri)
18
+ @properties = Properties.new(uri)
19
19
 
20
20
  get_chapters
21
- @chapters.select! { |chapter| @properties.is_chapter?(chapter) }
22
21
  end
23
22
 
24
23
  def inspect
@@ -41,8 +40,12 @@ module Mangdown
41
40
  bar = progress_bar(start, stop)
42
41
  reset(start, stop)
43
42
  loop do
44
- self.next.to_chapter.download_to(dir)
45
- bar.increment!
43
+ chapter = self.next.to_chapter
44
+ if chapter.download_to(dir)
45
+ bar.increment!
46
+ else
47
+ STDERR.puts("error: #{chapter.name} was not downloaded")
48
+ end
46
49
  end
47
50
  end
48
51
 
@@ -70,19 +73,9 @@ module Mangdown
70
73
 
71
74
  # get push MDHashes of manga chapters to @chapters
72
75
  def get_chapters
73
- doc = Tools.get_doc(@uri)
74
- root = @properties.root
75
-
76
- #get the link with chapter name and uri
77
- doc.css(@properties.manga_css_klass).each do |chapter|
78
- @chapters << MDHash.new(
79
- uri: (root + chapter[:href].sub(root, '')),
80
- name: chapter.text,
81
- site: @properties.type,
82
- )
76
+ @chapters += @properties.manga_chapters do |uri, name, site|
77
+ MDHash.new(uri: uri, name: name, site: site)
83
78
  end
84
-
85
- @chapters.reverse! if @properties.reverse
86
79
  end
87
80
 
88
81
  def chapter_indeces(start, stop)
@@ -90,14 +83,16 @@ module Mangdown
90
83
  end
91
84
 
92
85
  def setup_download_dir!(dir)
93
- dir += "/#{name}"
94
- Dir.mkdir(dir) unless Dir.exist?(dir)
95
- dir
86
+ "#{dir}/#{name}".tap {|dir| Dir.mkdir(dir) unless Dir.exist?(dir)}
96
87
  end
97
88
 
98
89
  def validate_indeces!(start, stop)
99
90
  i_start, i_stop = chapter_indeces(start, stop)
100
- if i_stop < i_stop
91
+ if i_start.nil? || i_stop.nil?
92
+ last = chapters.length - 1
93
+ error = "This manga has chapters in the range (0..#{last})"
94
+ raise ArgumentError, error
95
+ elsif i_stop < i_stop
101
96
  error = 'Last index must be greater than first index'
102
97
  raise ArgumentError, error
103
98
  end
@@ -22,21 +22,11 @@ module Mangdown
22
22
  @mangas.map(&:to_hash).to_yaml
23
23
  end
24
24
 
25
+ private
25
26
  # get a list of mangas from the uri
26
27
  def get_mangas(uri)
27
- properties = Properties.new(uri)
28
- if properties.empty?
29
- raise ArgumentError,
30
- "Bad URI: No Properties Specified for URI <#{uri}>"
31
- end
32
-
33
- doc = Tools.get_doc(uri)
34
- # This should be put in a tool
35
- doc.css(properties.manga_list_css_klass).each do |a|
36
- @mangas << MDHash.new(
37
- uri: "#{properties.manga_link_prefix}#{a[:href]}",
38
- name: a.text
39
- )
28
+ @mangas += Properties.new(uri).manga_list do |uri, name|
29
+ MDHash.new(uri: uri, name: name)
40
30
  end
41
31
  end
42
32
  end
@@ -1,4 +1,6 @@
1
1
  module Mangdown
2
+ ADAPTERS = []
3
+
2
4
  module Equality
3
5
  def eql?(other)
4
6
  (self.name == other.name) && (self.uri == other.uri)
@@ -8,5 +10,10 @@ module Mangdown
8
10
  # puts "You may want to use :eql?"
9
11
  super
10
12
  end
13
+
14
+ # space ship operator for sorting
15
+ def <=>(other)
16
+ self.name <=> other.name
17
+ end
11
18
  end
12
19
  end
@@ -2,10 +2,8 @@ module Mangdown
2
2
  class MDHash
3
3
  include Equality
4
4
 
5
- attr_reader :properties
6
-
7
5
  def initialize(options = {})
8
- @properties = Properties.new(options[:site] || options[:uri])
6
+ @properties = Properties.new(options[:uri], options[:site])
9
7
 
10
8
  @hash = {}
11
9
  [:uri, :name].each {|key| @hash[key] = options.fetch(key)}
@@ -15,7 +13,7 @@ module Mangdown
15
13
 
16
14
  # explicit conversion to manga
17
15
  def to_manga
18
- if @properties.is_manga?(self)
16
+ if @properties.is_manga?
19
17
  Manga.new(name, uri)
20
18
  else
21
19
  raise NoMethodError, 'This is not a known manga type'
@@ -24,8 +22,8 @@ module Mangdown
24
22
 
25
23
  # explicit conversion to chapter
26
24
  def to_chapter
27
- if @properties.is_chapter?(self)
28
- @properties.chapter_klass.new(name, uri)
25
+ if @properties.is_chapter?
26
+ Chapter.new(name, uri)
29
27
  else
30
28
  raise NoMethodError, 'This is not a known chapter type'
31
29
  end
@@ -33,7 +31,7 @@ module Mangdown
33
31
 
34
32
  # explicit conversion to page
35
33
  def to_page
36
- if @properties.is_page?(self)
34
+ if @properties.is_page?
37
35
  Page.new(name, uri)
38
36
  else
39
37
  raise NoMethodError, 'This is not a known page type'
@@ -50,6 +48,16 @@ module Mangdown
50
48
  @hash[:uri]
51
49
  end
52
50
 
51
+ # name writer
52
+ def name=(other)
53
+ @hash[:name] = other
54
+ end
55
+
56
+ # uri writer
57
+ def uri=(other)
58
+ @hash[:uri] = other
59
+ end
60
+
53
61
  def [](key)
54
62
  @hash[key]
55
63
  end