RubyGems - yamd - Versions diffs - 0.0.2 → 0.0.3 - Mend

yamd 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 37713855cb84a57329e8ff6cc81a41221d5658a7
-  data.tar.gz: c6daab7709db44037bbcb4071733871948ee5ae7
+  metadata.gz: a7cfa4dadcceadf400f49b0d0fc2e353b9a474a7
+  data.tar.gz: ae7c9eb249ca40a34515832316ca20069addd234
 SHA512:
-  metadata.gz: 1c6a49b2ee093ccf8f08b63ee41962ef7344d8b2cbaa129462e6033c3e2ac2c27f816138fac68f64fe8761b4d9e02cae821bdfca365d242ff8b1199a38ca0691
-  data.tar.gz: 6a8adc74ffa5268c582709bea2438de86208628bff54132a81de8e396402c717df05d60af7b583b675c349286b0eedbca14d59fefc8c6f6a718a2fbe78a9ec7f
+  metadata.gz: 13e0fd6911898fe1eed2b82ef27e55bdda1cbc4df5020694bdbf5236145a5490a1d518e0bb4a1716977644bfcb8e3ce4774bcc6257a76459a8d08a96b1b26a3b
+  data.tar.gz: b9b3c932f1313b3d6909c88cb292a3be96f3d22b3246c8dc190853152b9897845ecc4621e12d46d2180f1af709c854d4743c17c1a60dfd33b093a7bc38f09725

data/bin/yamd CHANGED Viewed

@@ -2,6 +2,7 @@
 require 'yamd/mangahere'
 require 'yamd/mangafox'
+require 'yamd/fakku'
 unless ARGV.size > 0
   puts 'USAGE: yamd <manga main page url>'
@@ -15,6 +16,8 @@ if /mangafox/.match(manga_main_page_url)
   manga = MangafoxCrawler.new(manga_main_page_url)
 elsif /mangahere/.match(manga_main_page_url)
   manga = MangahereCrawler.new(manga_main_page_url)
+elsif /fakku/.match(manga_main_page_url)
+  manga = FakkuCrawler.new(manga_main_page_url)
 else
   puts "The argument (#{manga_main_page_url}) doesn't seem to be a URL of one of the supported sites."
 end

data/lib/yamd.rb CHANGED Viewed

@@ -3,6 +3,23 @@ require 'open-uri'
 require 'addressable/uri'
 require 'pathname'
+require 'capybara'
+require 'capybara/poltergeist'
+Capybara.register_driver(:poltergeist) do | app |
+  Capybara::Poltergeist::Driver.new(app, js_errors: false)
+end
+Capybara.default_driver = :poltergeist
+Capybara.run_server = false
+$internet = Capybara.current_session
+def my_open(url)
+  $internet.visit url
+  $internet.html
+end
 class PageCrawler
   attr_reader :custom_data, :url, :parsed_html, :number, :chapter
@@ -42,7 +59,7 @@ class ChapterCrawler
     Enumerator.new do | yielder |
       number = 1
       pages_info.each do | page_info |
-        parsed_html = Nokogiri::HTML(open(page_info[:url]))
+        parsed_html = Nokogiri::HTML(my_open(page_info[:url]))
         yielder.yield self.class.page_class.new(page_info, parsed_html, number, self)
         number += 1
       end
@@ -70,7 +87,7 @@ class MangaCrawler
     Enumerator.new do | yielder |
       number = 1
       chapters_info.each do | chapter_info |
-        page = Nokogiri::HTML(open(chapter_info[:url]))
+        page = Nokogiri::HTML(my_open(chapter_info[:url]))
         yielder.yield self.class.chapter_class.new(chapter_info, page, number, self)
         number += 1
       end
@@ -91,22 +108,43 @@ class ImageDownloader
     @base_dir = base_dir
   end
+  # TODO: Many, many things:
+  #   * Add a hash parameter with the possibilities of parallelization
+  #   * What parallelization options should exist? Parallelize chapters? Parallelize pages independet of chapter? Chapters within a walking window? Pages within a walking window?
+  #   * Add the retryable gem to all the IO actions, THIS INCLUDE THE ABSTRACT CLASSES ABOVE.
+  #   * Avoid that an error with one page or chapter stops the download. Log the work of the algorithm and all the failures in a file inside the manga directory. This way the user can review the problems easily.
+  #   * Good and bad points of the parallelization options:
+  #     * Chapter - Start a thread for each chapter, download pages of the chapter in a sequential fashion.
+  #       * Good: The most easy to implement. For the average manga is a good granularity: between 10~100 threads of 19~45 pages each.
+  #       * Bad: If things goes bad, thing goes BAD. It's possible that each chapter will have not downloaded pages. In that case the best is remove everything and start over. Don't work for unending shounens (One Piece, in truth ~800 pieces of 19 pages each).
+  #     * Chapter within window - starts N threads and put them in a queue, wait the first end, when this happen adds an new element at the end of the queue and wait again.
+  #       * Good: Not very complex to implement. If things goes bad we remove the N last chapters, not everything. With log we can need to remove even less chapters. Works for unending shounens.
+  #       * Bad: Adds a variable to be hardcoded or received. More complex than simply parallelize every chapter.
+  #     * Pages (or Chapters and Pages) - starts a thread for every chapter, then starts a thread for every page of the chapter, simple, don't?
+  #       * Good: If no other option eaten all of your bandwidth, this one will frozen your computer or give you the best result. Easy to implement.
+  #       * Bad: Have you ever seen chaos? this is it. If something fail and you don't checked the log you will discover missing pages on the middle of chapters. Also not have a good granularity. There's a lot of thread creation overhead for little work. Also, almost surely will frozen your computer if the size of the manga is big and your bandwithd and processing power are small.
+  #     * Pages within window - the same as the chapters within window but with pages instead of chapters.
+  #       * Good: Not very complex to implement. Works well for mangas that the uploader has compressed an entire volume in an chapter, and there's only one volume. If things goes bad, you only need to delete the last N pages, if N is 40, for example, and it's a shounen with no less than 19 pages per chapter, delete the last 3 chapters.
+  #       * Bad: Bad granularity. Not so bad if your bandwidth is small, but probably will cost a lot of CPU for little effort.
   def download(manga)
-    manga_dir = Pathname.new(@base_dir).join(manga.name + '/')
+    manga_name = self.class.sanitize_dir_name(manga.name)
+    manga_dir = Pathname.new(@base_dir).join(manga_name + '/')
     if manga_dir.exist?
       p 'Manga dir exists. Skipping each existing chapter. If the script was forced to stop the last downloaded chapter can be incomplete. Remove it to be downloaded again.'
     else
       Dir.mkdir(manga_dir.to_s)
     end
     manga.chapters.each do | chapter |
-      chapter_dir = manga_dir.join(chapter.name + '/')
+      chapter_name = self.class.sanitize_dir_name(chapter.name)
+      chapter_dir = manga_dir.join(chapter_name + '/')
       unless chapter_dir.exist?
         Dir.mkdir(chapter_dir.to_s)
         chapter.pages.each do | page |
           page_name = self.class.format_page_name(page, chapter, manga)
           page_abs_path = chapter_dir.join(page_name).to_s
           File.open(page_abs_path, 'wb') do | f |
-            open(page.image_url, 'rb') do | image |
+            safe_uri = URI.encode(page.image_url, '[]')
+            open(safe_uri, 'rb') do | image |
               f.write(image.read)
             end
           end
@@ -120,5 +158,13 @@ class ImageDownloader
     page_path = Addressable::URI.parse(page.image_url).path
     format("%04d", page.number) + File.extname(page_path)
   end
+  # TODO: check if all text from the site that's used to make dirs or
+  #       files is sanitized
+  # thanks to "Ranma 1/2"
+  def self.sanitize_dir_name(name)
+    # TODO: this is a hack, find a serious solution for every possible case
+    name.gsub(/\//, '_')
+  end
 end

data/lib/yamd/fakku.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require 'yamd'
+require 'addressable/uri'
+class FakkuPage < PageCrawler
+  def image_url
+    @parsed_html.at_css('img.current-page')['src']
+  end
+end
+class FakkuChapter < ChapterCrawler
+  def self.page_class
+    FakkuPage
+  end
+  def pages_info
+    # there's no need of an lazy enumerator here, no IO action is taken
+    page_options = @parsed_html.at_css('div#content select.drop').css('option')
+    pages_number = page_options.map { | option | option['value'].to_i }.max
+    page_urls = (1..pages_number).to_a.map do | i |
+      { url: self.url + "#page=#{i}" }
+    end
+    page_urls
+  end
+  def name
+    @custom_data[:name]
+  end
+end
+class FakkuCrawler < MangaCrawler
+  def chapters_info
+    url = URI.join(self.url, @parsed_html.at_css('a.button.green')['href'])
+    [{ name: 'OnlyChapter',
+      url: url }]
+  end
+  def self.chapter_class
+    FakkuChapter
+  end
+  def name
+    @parsed_html.at_css('div.content-name h1').text
+  end
+end

metadata CHANGED Viewed

@@ -1,43 +1,85 @@
 --- !ruby/object:Gem::Specification
 name: yamd
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 platform: ruby
 authors:
 - Henrique Becker
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-12-17 00:00:00.000000000 Z
+date: 2015-12-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - "~>"
       - !ruby/object:Gem::Version
         version: '1.5'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - "~>"
       - !ruby/object:Gem::Version
         version: '1.5'
 - !ruby/object:Gem::Dependency
   name: addressable
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - "~>"
       - !ruby/object:Gem::Version
         version: '2.3'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ~>
+    - - "~>"
       - !ruby/object:Gem::Version
         version: '2.3'
+- !ruby/object:Gem::Dependency
+  name: capybara
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.5'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.5'
+- !ruby/object:Gem::Dependency
+  name: poltergeist
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.8'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.8'
+- !ruby/object:Gem::Dependency
+  name: phantomjs
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.9'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.9'
 description: 'This gem offers: classes to subclass and create a manga site crawler;
   a dowloader to use with these classes; some site-specific scripts.'
 email: henriquebecker91@gmail.com
@@ -46,10 +88,11 @@ executables:
 extensions: []
 extra_rdoc_files: []
 files:
+- bin/yamd
+- lib/yamd.rb
+- lib/yamd/fakku.rb
 - lib/yamd/mangafox.rb
 - lib/yamd/mangahere.rb
-- lib/yamd.rb
-- bin/yamd
 homepage: http://rubygems.org/gems/yamd
 licenses:
 - Public domain
@@ -60,20 +103,19 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.0.3
+rubygems_version: 2.4.5.1
 signing_key:
 specification_version: 4
 summary: YAMD (Yet Another Manga Downloader) - A lazy interface for writting manga
   downloaders
 test_files: []
-has_rdoc: true