ttwatcher 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,66 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Sites
5
+ module InternetConnection
6
+ # TODO REFACTOR
7
+ class Url # no-doc
8
+
9
+ ##
10
+ # Creates new Url instance.
11
+ #
12
+ # @param [String] url
13
+ # An url in any form.
14
+ #
15
+ # @param [Hash] params
16
+ # @option params [Symbol] :force_scheme
17
+ # @option params [Hash] :query_params
18
+ # @option params [String] :encoding
19
+ #
20
+ # @return [InternetConnection::Url]
21
+
22
+ def initialize(url, **params)
23
+ self.class.include Scheme
24
+
25
+ @url = url
26
+ @query = params[:query_params] || {}
27
+ @encoding = params[:encoding] || 'utf-8'
28
+
29
+ set_scheme params[:force_scheme]
30
+ encode_url
31
+ end
32
+
33
+ private
34
+
35
+ attr_reader :encoding # no-doc
36
+ attr_reader :url # no-doc
37
+
38
+ ##
39
+ # return +query+ with correct encoding
40
+
41
+ def query_normalization
42
+ uri = Addressable::URI.parse url
43
+ tmp_q = (uri.query_values || {}).merge(@query)
44
+
45
+ return tmp_q if tmp_q.empty? && tmp_q.values.all? { |v| v.encode == @encoding }
46
+
47
+ tmp_q.each_key do |k|
48
+ tmp_q[k] = tmp_q[k].encode(encoding, invalid: :replace,
49
+ undef: :replace)
50
+ end
51
+
52
+ return tmp_q
53
+ end
54
+
55
+ ##
56
+ # Normalization. String to rfc-standard uri
57
+
58
+ def encode_url
59
+ uri = Addressable::URI.parse url
60
+ uri.query_values = query_normalization
61
+ @url.replace uri.normalize.to_s.force_encoding(@encoding)
62
+ end
63
+ end # class Url
64
+ end # module InternetConnection
65
+ end # module Sites
66
+ end # module TTWatcher
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Sites
5
+ class Megashara < TorrentSite
6
+
7
+ def find_torrent(name) # no-doc
8
+ super name, { url: { query_params: { text: name } } }
9
+ end
10
+
11
+ private
12
+
13
+ def search_url(name = nil) # no-doc
14
+ domain_name + '/search/%s' % name
15
+ end
16
+ end # class Megashara
17
+ end # module Sites
18
+ end # module TTWatcher
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class Base # no-doc
6
+
7
+ # @param [String] page
8
+ # Url to initial page for parsing.
9
+ #
10
+ # @return [TorrentList, NilClass]
11
+ # TorrentList instance with torrents.
12
+ # When parser crashed due to unknown reason it returns +nil+.
13
+
14
+ def parse(page)
15
+ raise NotImplementedError, "Abstract method called!"
16
+ end
17
+
18
+ # @return [Site]
19
+
20
+ attr_reader :assigned_site
21
+
22
+ # @return [Hash]
23
+
24
+ attr_reader :settings
25
+
26
+ ##
27
+ # Creates Parser instance.
28
+ #
29
+ # @param [Site] site
30
+ #
31
+ # @return [Base]
32
+
33
+ def initialize(site)
34
+ @assigned_site = site
35
+ @settings = S[site.name][:parser_settings] || {}
36
+ end
37
+ end # class Base
38
+ end # module Parsers
39
+ end # module TTWatcher
@@ -0,0 +1,64 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class Megashara < SimpleParser
6
+ private
7
+
8
+ def new_pages_list # no-doc
9
+ return @links if @links.is_a? Array
10
+
11
+ unparsed_html_data = structure.css('table[@class="pagination-table"]')
12
+ .xpath('tr')
13
+ .xpath('td')[-2]
14
+ return @links = [] if unparsed_html_data.nil?
15
+
16
+ pages_count = unparsed_html_data.css('a').text.to_i - 1
17
+ link_template = unparsed_html_data.css('a').attr('href').to_s
18
+
19
+ @links = (1..pages_count).map do |i|
20
+ link_template.gsub /(\d+)$/, i.to_s
21
+ end
22
+ end
23
+
24
+ def torrents_unparsed # no-doc
25
+ structure.css('table[@class="table-wide"]').css('table').css('tr')
26
+ end
27
+
28
+ # @param [Nokogiri::Node] unparsed_data
29
+ #
30
+ # Surface scan for +megashara+ gives next information about single torrent
31
+ #
32
+ # ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
33
+ # -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
34
+ # ++ hsh[:url] ==> ex. "example.torrent.side/12345"
35
+ # ++ hsh[:tracker] ==> ex. :super_cool_tracker
36
+ # -- hsh[:author] ==> ex. 'Bit kitty fun'
37
+ # -- hsh[:added_date] ==> ex. '2016-06-15'
38
+ # ++ hsh[:seeders] ==> ex. 50042
39
+ # ++ hsh[:leeches] ==> ex. 1
40
+ # ++ hsh[:size] ==> ex. "20000 mb"
41
+ # ++ hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
42
+ # -- hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
43
+ #
44
+ # Where '++' means that field is present.
45
+ #
46
+ # @return [Torrent]
47
+
48
+ def extract_torrent(unparsed_data)
49
+ hsh = Hash.new
50
+
51
+ hsh[:name] = unparsed_data.css('td')[1].text
52
+ hsh[:magnet_url] = unparsed_data.css('td').css('a')[1].attr('href').to_s
53
+ hsh[:url] = unparsed_data.css('td').css('a').attr('href').to_s
54
+ hsh[:size] = unparsed_data.css('td')[3].text
55
+ hsh[:seeders] = unparsed_data.css('td')[4].text.to_i
56
+ hsh[:leeches] = unparsed_data.css('td')[5].text.to_i
57
+
58
+ hsh[:tracker] = assigned_site.name
59
+
60
+ Torrent.new hsh
61
+ end
62
+ end # class Megashara
63
+ end # module Parsers
64
+ end # module TTWatcher
@@ -0,0 +1,67 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class Rutor < SimpleParser
6
+ private
7
+
8
+ def new_pages_list # no-doc
9
+ return @links if @links.is_a? Array
10
+
11
+ @links = rutor_structure.xpath('b').first.xpath('a').map do |node|
12
+ node.attribute('href').to_s
13
+ end
14
+ end
15
+
16
+ def torrents_unparsed # no-doc
17
+ rutor_structure.css('tr[@class="gai"], tr[@class="tum"]')
18
+ end
19
+
20
+ def rutor_structure # no-doc
21
+ structure.xpath '//div[@id="index"]'
22
+ end
23
+
24
+ # @param [Nokogiri::Node] unparsed_data
25
+ #
26
+ # Surface scan for +rutor+ gives next information about single torrent
27
+ #
28
+ # ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
29
+ # -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
30
+ # ++ hsh[:url] ==> ex. "example.torrent.side/12345"
31
+ # ++ hsh[:tracker] ==> ex. :super_cool_tracker
32
+ # -- hsh[:author] ==> ex. 'Bit kitty fun'
33
+ # ++ hsh[:added_date] ==> ex. '2016-06-15'
34
+ # ++ hsh[:seeders] ==> ex. 50042
35
+ # ++ hsh[:leeches] ==> ex. 1
36
+ # ++ hsh[:size] ==> ex. "20000 mb"
37
+ # ++ hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
38
+ # ++ hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
39
+ #
40
+ # Where '++' means that field is present.
41
+ #
42
+ # @return [Torrent]
43
+
44
+ def extract_torrent(unparsed_data)
45
+ hsh = Hash.new
46
+
47
+ hsh[:short_link] = unparsed_data.css('a[@class="downgif"]').attribute('href').to_s
48
+ hsh[:magnet_url] = unparsed_data.css('a')[1].attribute('href').to_s
49
+ hsh[:url] = unparsed_data.css('a')[2].attribute('href').to_s
50
+ hsh[:name] = unparsed_data.css('a')[2].text
51
+ hsh[:added_date] = unparsed_data.css('td')[0].text
52
+ hsh[:seeders] = unparsed_data.css('td[@align="center"]').css('span')[0].text
53
+ hsh[:leeches] = unparsed_data.css('td[@align="center"]').css('span')[1].text
54
+
55
+ if (tmp_size = unparsed_data.css('td[@align="right"]')[1])
56
+ hsh[:size] = tmp_size.text
57
+ end
58
+
59
+ hsh[:tracker] = assigned_site.name
60
+ hsh[:download_url] = assigned_site.address(hsh[:short_link])
61
+ hsh[:url] = assigned_site.address(hsh[:url] )
62
+
63
+ Torrent.new hsh
64
+ end
65
+ end # class Rutor
66
+ end # module Parsers
67
+ end # module TTWatcher
@@ -0,0 +1,134 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class SimpleParser < Parsers::Base # no-doc
6
+
7
+ # @param [String] page
8
+ # Url to initial page for parsing.
9
+ #
10
+ # @return [TorrentList, nil]
11
+ # When any exception raised it returns nil
12
+ # Otherwise it returns TorrentList instance.
13
+
14
+ def parse(page)
15
+ return nil if page.is_a? NilClass
16
+ self.page = page
17
+ torrents = TorrentList.new
18
+ loop do
19
+ torrents << extract_torrents_from_page
20
+ next_page? ? continue_with_next_page : break
21
+ end
22
+
23
+ return torrents
24
+ rescue Exception => exception
25
+ notificate_about_crash! exception
26
+
27
+ return nil
28
+ end
29
+
30
+ private
31
+
32
+ # @return Array<String>
33
+ # List of urls that should be scanned before complete torrents search.
34
+
35
+ def new_pages_list
36
+ raise NotImplementedError, "Abstract method called!"
37
+ end
38
+
39
+ # @return Array<Nokogiri::Node>
40
+ # Each element from this array represents all available information
41
+ # about 1 torrent.
42
+
43
+ def torrents_unparsed
44
+ raise NotImplementedError, "Abstract method called!"
45
+ end
46
+
47
+ ##
48
+ # Extracts single torrent from +unparsed_data+.
49
+ #
50
+ # @param [Nokogiri::Node] unparsed_data
51
+ #
52
+ # @return [Torrent]
53
+
54
+ def extract_torrent(unparsed_data)
55
+ raise NotImplementedError, "Abstract method called!"
56
+ end
57
+
58
+ # @return [Encoding] (Encoding::UTF_8)
59
+ # Parser encoding preferences.
60
+
61
+ attr_reader :encoding
62
+
63
+ # @param [Encoding, String] new_encoding
64
+
65
+ def encoding=(new_encoding)
66
+ @encoding =
67
+ if new_encoding.is_a?(Encoding)
68
+ new_encoding
69
+ else
70
+ Encoding.find new_encoding
71
+ end
72
+ end
73
+
74
+ # @return [String]
75
+ # Current page.
76
+
77
+ attr_reader :page
78
+
79
+ def page=(other_page)
80
+ @page =
81
+ if other_page.encoding.name == encoding.name
82
+ other_page
83
+ else
84
+ other_page.force_encoding encoding
85
+ end
86
+ end
87
+
88
+ ##
89
+ # Structure for current +page+.
90
+ #
91
+ # @return [Nokogiri::HTML::Document]
92
+
93
+ def structure
94
+ Nokogiri::HTML page, nil, encoding.to_s
95
+ end
96
+
97
+ # @return [TorrentList]
98
+ # Returns extracted torrents from current +page+.
99
+
100
+ def extract_torrents_from_page
101
+ list = TorrentList.new
102
+ torrents_unparsed.each do |unparsed_torrent|
103
+ torrent = extract_torrent unparsed_torrent
104
+ list << torrent
105
+ end
106
+
107
+ return list
108
+ end
109
+
110
+ # @return [TrueClass, FalseClass]
111
+ # Returns +true+ if some pages wasn't scanned, otherwise returns +false+.
112
+
113
+ def next_page?
114
+ new_pages_list.count > 0
115
+ end
116
+
117
+ def continue_with_next_page # no-doc
118
+ url = new_pages_list.pop
119
+ self.page = assigned_site.download_page url
120
+ end
121
+
122
+ def initialize(site) # no-doc
123
+ super
124
+
125
+ self.encoding =
126
+ settings[:encoding] ? settings[:encoding] : Encoding::UTF_8
127
+ end
128
+
129
+ def notificate_about_crash!(exception)
130
+ Logger.with_backtrace "Parser #{self.class} crashed with error:\n #{exception.inspect}"
131
+ end
132
+ end # class SimpleParser
133
+ end # module Parsers
134
+ end # module TTWatcher
@@ -0,0 +1,62 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+ module TTWatcher
4
+ module Parsers
5
+ class Unionpeer < SimpleParser
6
+ private
7
+
8
+ def new_pages_list # no-doc
9
+ return @links if @links.is_a? Array
10
+
11
+ @links = structure.css('p[@class="small"]').css('a').map do |node|
12
+ node.attr('href')
13
+ end.slice!(1..-2) || []
14
+ end
15
+
16
+ def torrents_unparsed # no-doc
17
+ structure.css 'tr[class="tCenter hl-tr "]'
18
+ end
19
+
20
+ # @param [Nokogiri::Node] unparsed_data
21
+ #
22
+ # Surface scan for +unionpeer+ gives next information about single torrent
23
+ #
24
+ # ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
25
+ # -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
26
+ # ++ hsh[:url] ==> ex. "example.torrent.side/12345"
27
+ # ++ hsh[:tracker] ==> ex. :super_cool_tracker
28
+ # ++ hsh[:author] ==> ex. 'Bit kitty fun'
29
+ # ++ hsh[:added_date] ==> ex. '2016-06-15'
30
+ # ++ hsh[:seeders] ==> ex. 50042
31
+ # ++ hsh[:leeches] ==> ex. 1
32
+ # ++ hsh[:size] ==> ex. "20000 mb"
33
+ # -- hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
34
+ # ++ hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
35
+ #
36
+ # Where '++' means that field is present.
37
+ #
38
+ # @return [Torrent]
39
+
40
+ def extract_torrent(unparsed_data)
41
+ hsh = Hash.new
42
+
43
+ hsh[:name] = unparsed_data.css('a[@class="genmed2 tLink"]').text
44
+ hsh[:author] = unparsed_data.css('td[@class=row1]')[2].text
45
+ hsh[:size] = unparsed_data.css('a[@class="small tr-dl"]').text
46
+ hsh[:added_date] = unparsed_data.css('td[@class="row4 small nowrap"]').css('p')[1].text
47
+ hsh[:seeders] = unparsed_data.css('td[@class="row4 seedmed bold"]').text.to_i
48
+ hsh[:leeches] = unparsed_data.css('td[@class="row4 leechmed"]').text.to_i
49
+
50
+ url = unparsed_data.css('a[@class="genmed2 tLink"]').attr('href').to_s
51
+ hsh[:url] = assigned_site.address(url)
52
+
53
+ url = unparsed_data.css('a[@class="small tr-dl"]').attr('href').to_s
54
+ hsh[:download_url] = assigned_site.address(url)
55
+
56
+ hsh[:tracker] = assigned_site.name
57
+
58
+ return Torrent.new(hsh)
59
+ end
60
+ end # class Unionpeer
61
+ end # module Parsers
62
+ end # module TTWatcher