ttwatcher 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Sites
5
+ module InternetConnection
6
+ # TODO REFACTOR
7
+ class Url # no-doc
8
+
9
+ ##
10
+ # Creates new Url instance.
11
+ #
12
+ # @param [String] url
13
+ # An url in any form.
14
+ #
15
+ # @param [Hash] params
16
+ # @option params [Symbol] :force_scheme
17
+ # @option params [Hash] :query_params
18
+ # @option params [String] :encoding
19
+ #
20
+ # @return [InternetConnection::Url]
21
+
22
+ def initialize(url, **params)
23
+ self.class.include Scheme
24
+
25
+ @url = url
26
+ @query = params[:query_params] || {}
27
+ @encoding = params[:encoding] || 'utf-8'
28
+
29
+ set_scheme params[:force_scheme]
30
+ encode_url
31
+ end
32
+
33
+ private
34
+
35
+ attr_reader :encoding # no-doc
36
+ attr_reader :url # no-doc
37
+
38
+ ##
39
+ # return +query+ with correct encoding
40
+
41
+ def query_normalization
42
+ uri = Addressable::URI.parse url
43
+ tmp_q = (uri.query_values || {}).merge(@query)
44
+
45
+ return tmp_q if tmp_q.empty? && tmp_q.values.all? { |v| v.encode == @encoding }
46
+
47
+ tmp_q.each_key do |k|
48
+ tmp_q[k] = tmp_q[k].encode(encoding, invalid: :replace,
49
+ undef: :replace)
50
+ end
51
+
52
+ return tmp_q
53
+ end
54
+
55
+ ##
56
+ # Normalization. String to rfc-standard uri
57
+
58
+ def encode_url
59
+ uri = Addressable::URI.parse url
60
+ uri.query_values = query_normalization
61
+ @url.replace uri.normalize.to_s.force_encoding(@encoding)
62
+ end
63
+ end # class Url
64
+ end # module InternetConnection
65
+ end # module Sites
66
+ end # module TTWatcher
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Sites
5
+ class Megashara < TorrentSite
6
+
7
+ def find_torrent(name) # no-doc
8
+ super name, { url: { query_params: { text: name } } }
9
+ end
10
+
11
+ private
12
+
13
+ def search_url(name = nil) # no-doc
14
+ domain_name + '/search/%s' % name
15
+ end
16
+ end # class Megashara
17
+ end # module Sites
18
+ end # module TTWatcher
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class Base # no-doc
6
+
7
+ # @param [String] page
8
+ # Url to initial page for parsing.
9
+ #
10
+ # @return [TorrentList, NilClass]
11
+ # TorrentList instance with torrents.
12
+ # When parser crashed due to unknown reason it returns +nil+.
13
+
14
+ def parse(page)
15
+ raise NotImplementedError, "Abstract method called!"
16
+ end
17
+
18
+ # @return [Site]
19
+
20
+ attr_reader :assigned_site
21
+
22
+ # @return [Hash]
23
+
24
+ attr_reader :settings
25
+
26
+ ##
27
+ # Creates Parser instance.
28
+ #
29
+ # @param [Site] site
30
+ #
31
+ # @return [Base]
32
+
33
+ def initialize(site)
34
+ @assigned_site = site
35
+ @settings = S[site.name][:parser_settings] || {}
36
+ end
37
+ end # class Base
38
+ end # module Parsers
39
+ end # module TTWatcher
@@ -0,0 +1,64 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class Megashara < SimpleParser
6
+ private
7
+
8
+ def new_pages_list # no-doc
9
+ return @links if @links.is_a? Array
10
+
11
+ unparsed_html_data = structure.css('table[@class="pagination-table"]')
12
+ .xpath('tr')
13
+ .xpath('td')[-2]
14
+ return @links = [] if unparsed_html_data.nil?
15
+
16
+ pages_count = unparsed_html_data.css('a').text.to_i - 1
17
+ link_template = unparsed_html_data.css('a').attr('href').to_s
18
+
19
+ @links = (1..pages_count).map do |i|
20
+ link_template.gsub /(\d+)$/, i.to_s
21
+ end
22
+ end
23
+
24
+ def torrents_unparsed # no-doc
25
+ structure.css('table[@class="table-wide"]').css('table').css('tr')
26
+ end
27
+
28
+ # @param [Nokogiri::Node] unparsed_data
29
+ #
30
+ # Surface scan for +megashara+ gives next information about single torrent
31
+ #
32
+ # ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
33
+ # -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
34
+ # ++ hsh[:url] ==> ex. "example.torrent.side/12345"
35
+ # ++ hsh[:tracker] ==> ex. :super_cool_tracker
36
+ # -- hsh[:author] ==> ex. 'Bit kitty fun'
37
+ # -- hsh[:added_date] ==> ex. '2016-06-15'
38
+ # ++ hsh[:seeders] ==> ex. 50042
39
+ # ++ hsh[:leeches] ==> ex. 1
40
+ # ++ hsh[:size] ==> ex. "20000 mb"
41
+ # ++ hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
42
+ # -- hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
43
+ #
44
+ # Where '++' means that field is present.
45
+ #
46
+ # @return [Torrent]
47
+
48
+ def extract_torrent(unparsed_data)
49
+ hsh = Hash.new
50
+
51
+ hsh[:name] = unparsed_data.css('td')[1].text
52
+ hsh[:magnet_url] = unparsed_data.css('td').css('a')[1].attr('href').to_s
53
+ hsh[:url] = unparsed_data.css('td').css('a').attr('href').to_s
54
+ hsh[:size] = unparsed_data.css('td')[3].text
55
+ hsh[:seeders] = unparsed_data.css('td')[4].text.to_i
56
+ hsh[:leeches] = unparsed_data.css('td')[5].text.to_i
57
+
58
+ hsh[:tracker] = assigned_site.name
59
+
60
+ Torrent.new hsh
61
+ end
62
+ end # class Megashara
63
+ end # module Parsers
64
+ end # module TTWatcher
@@ -0,0 +1,67 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class Rutor < SimpleParser
6
+ private
7
+
8
+ def new_pages_list # no-doc
9
+ return @links if @links.is_a? Array
10
+
11
+ @links = rutor_structure.xpath('b').first.xpath('a').map do |node|
12
+ node.attribute('href').to_s
13
+ end
14
+ end
15
+
16
+ def torrents_unparsed # no-doc
17
+ rutor_structure.css('tr[@class="gai"], tr[@class="tum"]')
18
+ end
19
+
20
+ def rutor_structure # no-doc
21
+ structure.xpath '//div[@id="index"]'
22
+ end
23
+
24
+ # @param [Nokogiri::Node] unparsed_data
25
+ #
26
+ # Surface scan for +rutor+ gives next information about single torrent
27
+ #
28
+ # ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
29
+ # -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
30
+ # ++ hsh[:url] ==> ex. "example.torrent.side/12345"
31
+ # ++ hsh[:tracker] ==> ex. :super_cool_tracker
32
+ # -- hsh[:author] ==> ex. 'Bit kitty fun'
33
+ # ++ hsh[:added_date] ==> ex. '2016-06-15'
34
+ # ++ hsh[:seeders] ==> ex. 50042
35
+ # ++ hsh[:leeches] ==> ex. 1
36
+ # ++ hsh[:size] ==> ex. "20000 mb"
37
+ # ++ hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
38
+ # ++ hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
39
+ #
40
+ # Where '++' means that field is present.
41
+ #
42
+ # @return [Torrent]
43
+
44
+ def extract_torrent(unparsed_data)
45
+ hsh = Hash.new
46
+
47
+ hsh[:short_link] = unparsed_data.css('a[@class="downgif"]').attribute('href').to_s
48
+ hsh[:magnet_url] = unparsed_data.css('a')[1].attribute('href').to_s
49
+ hsh[:url] = unparsed_data.css('a')[2].attribute('href').to_s
50
+ hsh[:name] = unparsed_data.css('a')[2].text
51
+ hsh[:added_date] = unparsed_data.css('td')[0].text
52
+ hsh[:seeders] = unparsed_data.css('td[@align="center"]').css('span')[0].text
53
+ hsh[:leeches] = unparsed_data.css('td[@align="center"]').css('span')[1].text
54
+
55
+ if (tmp_size = unparsed_data.css('td[@align="right"]')[1])
56
+ hsh[:size] = tmp_size.text
57
+ end
58
+
59
+ hsh[:tracker] = assigned_site.name
60
+ hsh[:download_url] = assigned_site.address(hsh[:short_link])
61
+ hsh[:url] = assigned_site.address(hsh[:url] )
62
+
63
+ Torrent.new hsh
64
+ end
65
+ end # class Rutor
66
+ end # module Parsers
67
+ end # module TTWatcher
@@ -0,0 +1,134 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class SimpleParser < Parsers::Base # no-doc
6
+
7
+ # @param [String] page
8
+ # Url to initial page for parsing.
9
+ #
10
+ # @return [TorrentList, nil]
11
+ # When any exception raised it returns nil
12
+ # Otherwise it returns TorrentList instance.
13
+
14
+ def parse(page)
15
+ return nil if page.is_a? NilClass
16
+ self.page = page
17
+ torrents = TorrentList.new
18
+ loop do
19
+ torrents << extract_torrents_from_page
20
+ next_page? ? continue_with_next_page : break
21
+ end
22
+
23
+ return torrents
24
+ rescue Exception => exception
25
+ notificate_about_crash! exception
26
+
27
+ return nil
28
+ end
29
+
30
+ private
31
+
32
+ # @return Array<String>
33
+ # List of urls that should be scanned before complete torrents search.
34
+
35
+ def new_pages_list
36
+ raise NotImplementedError, "Abstract method called!"
37
+ end
38
+
39
+ # @return Array<Nokogiri::Node>
40
+ # Each element from this array represents all available information
41
+ # about 1 torrent.
42
+
43
+ def torrents_unparsed
44
+ raise NotImplementedError, "Abstract method called!"
45
+ end
46
+
47
+ ##
48
+ # Extracts single torrent from +unparsed_data+.
49
+ #
50
+ # @param [Nokogiri::Node] unparsed_data
51
+ #
52
+ # @return [Torrent]
53
+
54
+ def extract_torrent(unparsed_data)
55
+ raise NotImplementedError, "Abstract method called!"
56
+ end
57
+
58
+ # @return [Encoding] (Encoding::UTF_8)
59
+ # Parser encoding preferences.
60
+
61
+ attr_reader :encoding
62
+
63
+ # @param [Encoding, String] new_encoding
64
+
65
+ def encoding=(new_encoding)
66
+ @encoding =
67
+ if new_encoding.is_a?(Encoding)
68
+ new_encoding
69
+ else
70
+ Encoding.find new_encoding
71
+ end
72
+ end
73
+
74
+ # @return [String]
75
+ # Current page.
76
+
77
+ attr_reader :page
78
+
79
+ def page=(other_page)
80
+ @page =
81
+ if other_page.encoding.name == encoding.name
82
+ other_page
83
+ else
84
+ other_page.force_encoding encoding
85
+ end
86
+ end
87
+
88
+ ##
89
+ # Structure for current +page+.
90
+ #
91
+ # @return [Nokogiri::HTML::Document]
92
+
93
+ def structure
94
+ Nokogiri::HTML page, nil, encoding.to_s
95
+ end
96
+
97
+ # @return [TorrentList]
98
+ # Returns extracted torrents from current +page+.
99
+
100
+ def extract_torrents_from_page
101
+ list = TorrentList.new
102
+ torrents_unparsed.each do |unparsed_torrent|
103
+ torrent = extract_torrent unparsed_torrent
104
+ list << torrent
105
+ end
106
+
107
+ return list
108
+ end
109
+
110
+ # @return [TrueClass, FalseClass]
111
+ # Returns +true+ if some pages wasn't scanned, otherwise returns +false+.
112
+
113
+ def next_page?
114
+ new_pages_list.count > 0
115
+ end
116
+
117
+ def continue_with_next_page # no-doc
118
+ url = new_pages_list.pop
119
+ self.page = assigned_site.download_page url
120
+ end
121
+
122
+ def initialize(site) # no-doc
123
+ super
124
+
125
+ self.encoding =
126
+ settings[:encoding] ? settings[:encoding] : Encoding::UTF_8
127
+ end
128
+
129
+ def notificate_about_crash!(exception)
130
+ Logger.with_backtrace "Parser #{self.class} crashed with error:\n #{exception.inspect}"
131
+ end
132
+ end # class SimpleParser
133
+ end # module Parsers
134
+ end # module TTWatcher
@@ -0,0 +1,62 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class Unionpeer < SimpleParser
6
+ private
7
+
8
+ def new_pages_list # no-doc
9
+ return @links if @links.is_a? Array
10
+
11
+ @links = structure.css('p[@class="small"]').css('a').map do |node|
12
+ node.attr('href')
13
+ end.slice!(1..-2) || []
14
+ end
15
+
16
+ def torrents_unparsed # no-doc
17
+ structure.css 'tr[class="tCenter hl-tr "]'
18
+ end
19
+
20
+ # @param [Nokogiri::Node] unparsed_data
21
+ #
22
+ # Surface scan for +unionpeer+ gives next information about single torrent
23
+ #
24
+ # ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
25
+ # -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
26
+ # ++ hsh[:url] ==> ex. "example.torrent.side/12345"
27
+ # ++ hsh[:tracker] ==> ex. :super_cool_tracker
28
+ # ++ hsh[:author] ==> ex. 'Bit kitty fun'
29
+ # ++ hsh[:added_date] ==> ex. '2016-06-15'
30
+ # ++ hsh[:seeders] ==> ex. 50042
31
+ # ++ hsh[:leeches] ==> ex. 1
32
+ # ++ hsh[:size] ==> ex. "20000 mb"
33
+ # -- hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
34
+ # ++ hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
35
+ #
36
+ # Where '++' means that field is present.
37
+ #
38
+ # @return [Torrent]
39
+
40
+ def extract_torrent(unparsed_data)
41
+ hsh = Hash.new
42
+
43
+ hsh[:name] = unparsed_data.css('a[@class="genmed2 tLink"]').text
44
+ hsh[:author] = unparsed_data.css('td[@class=row1]')[2].text
45
+ hsh[:size] = unparsed_data.css('a[@class="small tr-dl"]').text
46
+ hsh[:added_date] = unparsed_data.css('td[@class="row4 small nowrap"]').css('p')[1].text
47
+ hsh[:seeders] = unparsed_data.css('td[@class="row4 seedmed bold"]').text.to_i
48
+ hsh[:leeches] = unparsed_data.css('td[@class="row4 leechmed"]').text.to_i
49
+
50
+ url = unparsed_data.css('a[@class="genmed2 tLink"]').attr('href').to_s
51
+ hsh[:url] = assigned_site.address(url)
52
+
53
+ url = unparsed_data.css('a[@class="small tr-dl"]').attr('href').to_s
54
+ hsh[:download_url] = assigned_site.address(url)
55
+
56
+ hsh[:tracker] = assigned_site.name
57
+
58
+ return Torrent.new(hsh)
59
+ end
60
+ end # class Unionpeer
61
+ end # module Parsers
62
+ end # module TTWatcher