RubyGems - ttwatcher - Versions diffs - 1.0.1 - Mend

ttwatcher 1.0.1

Files changed (32) hide show

checksums.yaml +7 -0
data/Gemfile +20 -0
data/LICENSE +9 -0
data/sources/ttwatcher.rb +70 -0
data/sources/ttwatcher/helpers.rb +24 -0
data/sources/ttwatcher/logger.rb +40 -0
data/sources/ttwatcher/project_structure.rb +50 -0
data/sources/ttwatcher/sites.rb +68 -0
data/sources/ttwatcher/sites/config.rb +20 -0
data/sources/ttwatcher/sites/config.yml +29 -0
data/sources/ttwatcher/sites/connection.rb +82 -0
data/sources/ttwatcher/sites/connection/scheme.rb +79 -0
data/sources/ttwatcher/sites/connection/url.rb +66 -0
data/sources/ttwatcher/sites/megashara.rb +18 -0
data/sources/ttwatcher/sites/parsers/abstract_parser.rb +39 -0
data/sources/ttwatcher/sites/parsers/megashara_parser.rb +64 -0
data/sources/ttwatcher/sites/parsers/rutor_parser.rb +67 -0
data/sources/ttwatcher/sites/parsers/simple_parser.rb +134 -0
data/sources/ttwatcher/sites/parsers/unionpeer_parser.rb +62 -0
data/sources/ttwatcher/sites/parsers/zooqle_parser.rb +80 -0
data/sources/ttwatcher/sites/rutor.rb +13 -0
data/sources/ttwatcher/sites/site.rb +92 -0
data/sources/ttwatcher/sites/torrent_site.rb +70 -0
data/sources/ttwatcher/sites/unionpeer.rb +18 -0
data/sources/ttwatcher/sites/zooqle.rb +18 -0
data/sources/ttwatcher/torrent.rb +87 -0
data/sources/ttwatcher/torrent_agent.rb +34 -0
data/sources/ttwatcher/torrent_list.rb +51 -0
data/spec/sources/ttwatcher/sites_spec.rb +8 -0
data/spec/sources/ttwatcher/torrent_list_spec.rb +46 -0
data/spec/spec_helper.rb +20 -0
metadata +156 -0

data/sources/ttwatcher/sites/connection/url.rb ADDED

@@ -0,0 +1,66 @@
+# encoding: utf-8
+# frozen_string_literal: true
+module TTWatcher
+module Sites
+module InternetConnection
+  # TODO REFACTOR
+  class Url # no-doc
+    ##
+    # Creates new Url instance.
+    #
+    # @param [String] url
+    #   An url in any form.
+    #
+    # @param [Hash] params
+    # @option params [Symbol] :force_scheme
+    # @option params [Hash]   :query_params
+    # @option params [String] :encoding
+    #
+    # @return [InternetConnection::Url]
+    def initialize(url, **params)
+      self.class.include Scheme
+      @url      = url
+      @query    = params[:query_params] || {}
+      @encoding = params[:encoding]     || 'utf-8'
+      set_scheme params[:force_scheme]
+      encode_url
+    end
+    private
+    attr_reader :encoding # no-doc
+    attr_reader :url # no-doc
+    ##
+    # return +query+ with correct encoding
+    def query_normalization
+      uri =  Addressable::URI.parse url
+      tmp_q = (uri.query_values || {}).merge(@query)
+      return tmp_q if tmp_q.empty? && tmp_q.values.all? { |v| v.encode == @encoding }
+      tmp_q.each_key do |k|
+        tmp_q[k] = tmp_q[k].encode(encoding, invalid: :replace,
+                                             undef: :replace)
+      end
+      return tmp_q
+    end
+    ##
+    # Normalization. String to rfc-standard uri
+    def encode_url
+      uri = Addressable::URI.parse url
+      uri.query_values = query_normalization
+      @url.replace uri.normalize.to_s.force_encoding(@encoding)
+    end
+  end # class Url
+end # module InternetConnection
+end # module Sites
+end # module TTWatcher

data/sources/ttwatcher/sites/megashara.rb ADDED

@@ -0,0 +1,18 @@
+# encoding: utf-8
+# frozen_string_literal: true
+module TTWatcher
+module Sites
+  class Megashara < TorrentSite
+    def find_torrent(name) # no-doc
+      super name, { url: { query_params: { text: name } } }
+    end
+    private
+    def search_url(name = nil) # no-doc
+      domain_name + '/search/%s' % name
+    end
+  end # class Megashara
+end # module Sites
+end # module TTWatcher

data/sources/ttwatcher/sites/parsers/abstract_parser.rb ADDED

@@ -0,0 +1,39 @@
+# encoding: utf-8
+# frozen_string_literal: true
+module TTWatcher
+module Parsers
+  class Base # no-doc
+    # @param [String] page
+    #   Url to initial page for parsing.
+    #
+    # @return [TorrentList, NilClass]
+    #   TorrentList instance with torrents.
+    #   When parser crashed due to unknown reason it returns +nil+.
+    def parse(page)
+      raise NotImplementedError, "Abstract method called!"
+    end
+    # @return [Site]
+    attr_reader :assigned_site
+    # @return [Hash]
+    attr_reader :settings
+    ##
+    # Creates Parser instance.
+    #
+    # @param [Site] site
+    #
+    # @return [Base]
+    def initialize(site)
+      @assigned_site = site
+      @settings = S[site.name][:parser_settings] || {}
+    end
+  end # class Base
+end # module Parsers
+end # module TTWatcher

data/sources/ttwatcher/sites/parsers/megashara_parser.rb ADDED

@@ -0,0 +1,64 @@
+# encoding: utf-8
+# frozen_string_literal: true
+module TTWatcher
+module Parsers
+  class Megashara < SimpleParser
+    private
+    def new_pages_list # no-doc
+      return @links if @links.is_a? Array
+      unparsed_html_data = structure.css('table[@class="pagination-table"]')
+                                    .xpath('tr')
+                                    .xpath('td')[-2]
+      return @links = [] if unparsed_html_data.nil?
+      pages_count   = unparsed_html_data.css('a').text.to_i - 1
+      link_template = unparsed_html_data.css('a').attr('href').to_s
+      @links = (1..pages_count).map do |i|
+        link_template.gsub /(\d+)$/, i.to_s
+      end
+    end
+    def torrents_unparsed # no-doc
+      structure.css('table[@class="table-wide"]').css('table').css('tr')
+    end
+    # @param [Nokogiri::Node] unparsed_data
+    #
+    #   Surface scan for +megashara+ gives next information about single torrent
+    #
+    #     ++   hsh[:name]             ==> ex. "Cats swimming in pool 2016 BDRIP"
+    #     --   hsh[:description]      ==> ex. "Hot CATS. Summer 2016"
+    #     ++   hsh[:url]              ==> ex. "example.torrent.side/12345"
+    #     ++   hsh[:tracker]          ==> ex. :super_cool_tracker
+    #     --   hsh[:author]           ==> ex. 'Bit kitty fun'
+    #     --   hsh[:added_date]       ==> ex. '2016-06-15'
+    #     ++   hsh[:seeders]          ==> ex. 50042
+    #     ++   hsh[:leeches]          ==> ex. 1
+    #     ++   hsh[:size]             ==> ex. "20000 mb"
+    #     ++   hsh[:magnet_url]       ==> ex. "magnet:?xt=urn....................."
+    #     --   hsh[:download_url]     ==> ex. "example.torrent.side/12345/download"
+    #
+    #   Where '++' means that field is present.
+    #
+    # @return [Torrent]
+    def extract_torrent(unparsed_data)
+      hsh = Hash.new
+      hsh[:name]        = unparsed_data.css('td')[1].text
+      hsh[:magnet_url]  = unparsed_data.css('td').css('a')[1].attr('href').to_s
+      hsh[:url]         = unparsed_data.css('td').css('a').attr('href').to_s
+      hsh[:size]        = unparsed_data.css('td')[3].text
+      hsh[:seeders]     = unparsed_data.css('td')[4].text.to_i
+      hsh[:leeches]     = unparsed_data.css('td')[5].text.to_i
+      hsh[:tracker] = assigned_site.name
+      Torrent.new hsh
+    end
+  end # class Megashara
+end # module Parsers
+end # module TTWatcher

data/sources/ttwatcher/sites/parsers/rutor_parser.rb ADDED

@@ -0,0 +1,67 @@
+# encoding: utf-8
+# frozen_string_literal: true
+module TTWatcher
+module Parsers
+  class Rutor < SimpleParser
+    private
+    def new_pages_list # no-doc
+      return @links if @links.is_a? Array
+      @links = rutor_structure.xpath('b').first.xpath('a').map do |node|
+        node.attribute('href').to_s
+      end
+    end
+    def torrents_unparsed # no-doc
+      rutor_structure.css('tr[@class="gai"], tr[@class="tum"]')
+    end
+    def rutor_structure # no-doc
+      structure.xpath '//div[@id="index"]'
+    end
+    # @param [Nokogiri::Node] unparsed_data
+    #
+    #   Surface scan for +rutor+ gives next information about single torrent
+    #
+    #     ++   hsh[:name]             ==> ex. "Cats swimming in pool 2016 BDRIP"
+    #     --   hsh[:description]      ==> ex. "Hot CATS. Summer 2016"
+    #     ++   hsh[:url]              ==> ex. "example.torrent.side/12345"
+    #     ++   hsh[:tracker]          ==> ex. :super_cool_tracker
+    #     --   hsh[:author]           ==> ex. 'Bit kitty fun'
+    #     ++   hsh[:added_date]       ==> ex. '2016-06-15'
+    #     ++   hsh[:seeders]          ==> ex. 50042
+    #     ++   hsh[:leeches]          ==> ex. 1
+    #     ++   hsh[:size]             ==> ex. "20000 mb"
+    #     ++   hsh[:magnet_url]       ==> ex. "magnet:?xt=urn....................."
+    #     ++   hsh[:download_url]     ==> ex. "example.torrent.side/12345/download"
+    #
+    #   Where '++' means that field is present.
+    #
+    # @return [Torrent]
+    def extract_torrent(unparsed_data)
+      hsh = Hash.new
+      hsh[:short_link]  = unparsed_data.css('a[@class="downgif"]').attribute('href').to_s
+      hsh[:magnet_url]  = unparsed_data.css('a')[1].attribute('href').to_s
+      hsh[:url]         = unparsed_data.css('a')[2].attribute('href').to_s
+      hsh[:name]        = unparsed_data.css('a')[2].text
+      hsh[:added_date]  = unparsed_data.css('td')[0].text
+      hsh[:seeders]     = unparsed_data.css('td[@align="center"]').css('span')[0].text
+      hsh[:leeches]     = unparsed_data.css('td[@align="center"]').css('span')[1].text
+      if (tmp_size = unparsed_data.css('td[@align="right"]')[1])
+        hsh[:size] = tmp_size.text
+      end
+      hsh[:tracker]      = assigned_site.name
+      hsh[:download_url] = assigned_site.address(hsh[:short_link])
+      hsh[:url]          = assigned_site.address(hsh[:url] )
+      Torrent.new hsh
+    end
+  end # class Rutor
+end # module Parsers
+end # module TTWatcher

data/sources/ttwatcher/sites/parsers/simple_parser.rb ADDED

@@ -0,0 +1,134 @@
+# encoding: utf-8
+# frozen_string_literal: true
+module TTWatcher
+module Parsers
+  class SimpleParser < Parsers::Base # no-doc
+    # @param [String] page
+    #   Url to initial page for parsing.
+    #
+    # @return [TorrentList, nil]
+    #   When any exception raised it returns nil
+    #   Otherwise it returns TorrentList instance.
+    def parse(page)
+      return nil if page.is_a? NilClass
+      self.page = page
+      torrents = TorrentList.new
+      loop do
+        torrents << extract_torrents_from_page
+        next_page? ? continue_with_next_page : break
+      end
+      return torrents
+    rescue Exception => exception
+      notificate_about_crash! exception
+      return nil
+    end
+    private
+    # @return Array<String>
+    #   List of urls that should be scanned before complete torrents search.
+    def new_pages_list
+      raise NotImplementedError, "Abstract method called!"
+    end
+    # @return Array<Nokogiri::Node>
+    #   Each element from this array represents all available information
+    #   about 1 torrent.
+    def torrents_unparsed
+      raise NotImplementedError, "Abstract method called!"
+    end
+    ##
+    # Extracts single torrent from +unparsed_data+.
+    #
+    # @param [Nokogiri::Node] unparsed_data
+    #
+    # @return [Torrent]
+    def extract_torrent(unparsed_data)
+      raise NotImplementedError, "Abstract method called!"
+    end
+    # @return [Encoding] (Encoding::UTF_8)
+    #   Parser encoding preferences.
+    attr_reader :encoding
+    # @param [Encoding, String] new_encoding
+    def encoding=(new_encoding)
+      @encoding =
+      if new_encoding.is_a?(Encoding)
+        new_encoding
+      else
+        Encoding.find new_encoding
+      end
+    end
+    # @return [String]
+    #   Current page.
+    attr_reader :page
+    def page=(other_page)
+      @page =
+        if other_page.encoding.name == encoding.name
+          other_page
+        else
+          other_page.force_encoding encoding
+        end
+    end
+    ##
+    # Structure for current +page+.
+    #
+    # @return [Nokogiri::HTML::Document]
+    def structure
+      Nokogiri::HTML page, nil, encoding.to_s
+    end
+    # @return [TorrentList]
+    #  Returns extracted torrents from current +page+.
+    def extract_torrents_from_page
+      list = TorrentList.new
+      torrents_unparsed.each do |unparsed_torrent|
+        torrent = extract_torrent unparsed_torrent
+        list << torrent
+      end
+      return list
+    end
+    # @return [TrueClass, FalseClass]
+    #   Returns +true+ if some pages wasn't scanned, otherwise returns +false+.
+    def next_page?
+      new_pages_list.count > 0
+    end
+    def continue_with_next_page # no-doc
+      url = new_pages_list.pop
+      self.page = assigned_site.download_page url
+    end
+    def initialize(site) # no-doc
+      super
+      self.encoding =
+        settings[:encoding] ? settings[:encoding] : Encoding::UTF_8
+    end
+    def notificate_about_crash!(exception)
+      Logger.with_backtrace "Parser #{self.class} crashed with error:\n #{exception.inspect}"
+    end
+  end # class SimpleParser
+end # module Parsers
+end # module TTWatcher

data/sources/ttwatcher/sites/parsers/unionpeer_parser.rb ADDED

@@ -0,0 +1,62 @@
+# encoding: utf-8
+# frozen_string_literal: true
+module TTWatcher
+module Parsers
+  class Unionpeer < SimpleParser
+    private
+    def new_pages_list # no-doc
+      return @links if @links.is_a? Array
+      @links = structure.css('p[@class="small"]').css('a').map do |node|
+        node.attr('href')
+      end.slice!(1..-2) || []
+    end
+    def torrents_unparsed # no-doc
+      structure.css 'tr[class="tCenter hl-tr "]'
+    end
+    # @param [Nokogiri::Node] unparsed_data
+    #
+    #   Surface scan for +unionpeer+ gives next information about single torrent
+    #
+    #     ++   hsh[:name]             ==> ex. "Cats swimming in pool 2016 BDRIP"
+    #     --   hsh[:description]      ==> ex. "Hot CATS. Summer 2016"
+    #     ++   hsh[:url]              ==> ex. "example.torrent.side/12345"
+    #     ++   hsh[:tracker]          ==> ex. :super_cool_tracker
+    #     ++   hsh[:author]           ==> ex. 'Bit kitty fun'
+    #     ++   hsh[:added_date]       ==> ex. '2016-06-15'
+    #     ++   hsh[:seeders]          ==> ex. 50042
+    #     ++   hsh[:leeches]          ==> ex. 1
+    #     ++   hsh[:size]             ==> ex. "20000 mb"
+    #     --   hsh[:magnet_url]       ==> ex. "magnet:?xt=urn....................."
+    #     ++   hsh[:download_url]     ==> ex. "example.torrent.side/12345/download"
+    #
+    #   Where '++' means that field is present.
+    #
+    # @return [Torrent]
+    def extract_torrent(unparsed_data)
+      hsh = Hash.new
+      hsh[:name]       = unparsed_data.css('a[@class="genmed2 tLink"]').text
+      hsh[:author]     = unparsed_data.css('td[@class=row1]')[2].text
+      hsh[:size]       = unparsed_data.css('a[@class="small tr-dl"]').text
+      hsh[:added_date] = unparsed_data.css('td[@class="row4 small nowrap"]').css('p')[1].text
+      hsh[:seeders]    = unparsed_data.css('td[@class="row4 seedmed bold"]').text.to_i
+      hsh[:leeches]    = unparsed_data.css('td[@class="row4 leechmed"]').text.to_i
+      url = unparsed_data.css('a[@class="genmed2 tLink"]').attr('href').to_s
+      hsh[:url] = assigned_site.address(url)
+      url = unparsed_data.css('a[@class="small tr-dl"]').attr('href').to_s
+      hsh[:download_url] = assigned_site.address(url)
+      hsh[:tracker] = assigned_site.name
+      return Torrent.new(hsh)
+    end
+  end # class Unionpeer
+end # module Parsers
+end # module TTWatcher