ttwatcher 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +20 -0
- data/LICENSE +9 -0
- data/sources/ttwatcher.rb +70 -0
- data/sources/ttwatcher/helpers.rb +24 -0
- data/sources/ttwatcher/logger.rb +40 -0
- data/sources/ttwatcher/project_structure.rb +50 -0
- data/sources/ttwatcher/sites.rb +68 -0
- data/sources/ttwatcher/sites/config.rb +20 -0
- data/sources/ttwatcher/sites/config.yml +29 -0
- data/sources/ttwatcher/sites/connection.rb +82 -0
- data/sources/ttwatcher/sites/connection/scheme.rb +79 -0
- data/sources/ttwatcher/sites/connection/url.rb +66 -0
- data/sources/ttwatcher/sites/megashara.rb +18 -0
- data/sources/ttwatcher/sites/parsers/abstract_parser.rb +39 -0
- data/sources/ttwatcher/sites/parsers/megashara_parser.rb +64 -0
- data/sources/ttwatcher/sites/parsers/rutor_parser.rb +67 -0
- data/sources/ttwatcher/sites/parsers/simple_parser.rb +134 -0
- data/sources/ttwatcher/sites/parsers/unionpeer_parser.rb +62 -0
- data/sources/ttwatcher/sites/parsers/zooqle_parser.rb +80 -0
- data/sources/ttwatcher/sites/rutor.rb +13 -0
- data/sources/ttwatcher/sites/site.rb +92 -0
- data/sources/ttwatcher/sites/torrent_site.rb +70 -0
- data/sources/ttwatcher/sites/unionpeer.rb +18 -0
- data/sources/ttwatcher/sites/zooqle.rb +18 -0
- data/sources/ttwatcher/torrent.rb +87 -0
- data/sources/ttwatcher/torrent_agent.rb +34 -0
- data/sources/ttwatcher/torrent_list.rb +51 -0
- data/spec/sources/ttwatcher/sites_spec.rb +8 -0
- data/spec/sources/ttwatcher/torrent_list_spec.rb +46 -0
- data/spec/spec_helper.rb +20 -0
- metadata +156 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Sites
|
5
|
+
module InternetConnection
|
6
|
+
# TODO REFACTOR
|
7
|
+
class Url # no-doc
|
8
|
+
|
9
|
+
##
|
10
|
+
# Creates new Url instance.
|
11
|
+
#
|
12
|
+
# @param [String] url
|
13
|
+
# An url in any form.
|
14
|
+
#
|
15
|
+
# @param [Hash] params
|
16
|
+
# @option params [Symbol] :force_scheme
|
17
|
+
# @option params [Hash] :query_params
|
18
|
+
# @option params [String] :encoding
|
19
|
+
#
|
20
|
+
# @return [InternetConnection::Url]
|
21
|
+
|
22
|
+
def initialize(url, **params)
|
23
|
+
self.class.include Scheme
|
24
|
+
|
25
|
+
@url = url
|
26
|
+
@query = params[:query_params] || {}
|
27
|
+
@encoding = params[:encoding] || 'utf-8'
|
28
|
+
|
29
|
+
set_scheme params[:force_scheme]
|
30
|
+
encode_url
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :encoding # no-doc
|
36
|
+
attr_reader :url # no-doc
|
37
|
+
|
38
|
+
##
|
39
|
+
# return +query+ with correct encoding
|
40
|
+
|
41
|
+
def query_normalization
|
42
|
+
uri = Addressable::URI.parse url
|
43
|
+
tmp_q = (uri.query_values || {}).merge(@query)
|
44
|
+
|
45
|
+
return tmp_q if tmp_q.empty? && tmp_q.values.all? { |v| v.encode == @encoding }
|
46
|
+
|
47
|
+
tmp_q.each_key do |k|
|
48
|
+
tmp_q[k] = tmp_q[k].encode(encoding, invalid: :replace,
|
49
|
+
undef: :replace)
|
50
|
+
end
|
51
|
+
|
52
|
+
return tmp_q
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Normalization. String to rfc-standard uri
|
57
|
+
|
58
|
+
def encode_url
|
59
|
+
uri = Addressable::URI.parse url
|
60
|
+
uri.query_values = query_normalization
|
61
|
+
@url.replace uri.normalize.to_s.force_encoding(@encoding)
|
62
|
+
end
|
63
|
+
end # class Url
|
64
|
+
end # module InternetConnection
|
65
|
+
end # module Sites
|
66
|
+
end # module TTWatcher
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Sites
|
5
|
+
class Megashara < TorrentSite
|
6
|
+
|
7
|
+
def find_torrent(name) # no-doc
|
8
|
+
super name, { url: { query_params: { text: name } } }
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def search_url(name = nil) # no-doc
|
14
|
+
domain_name + '/search/%s' % name
|
15
|
+
end
|
16
|
+
end # class Megashara
|
17
|
+
end # module Sites
|
18
|
+
end # module TTWatcher
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class Base # no-doc
|
6
|
+
|
7
|
+
# @param [String] page
|
8
|
+
# Url to initial page for parsing.
|
9
|
+
#
|
10
|
+
# @return [TorrentList, NilClass]
|
11
|
+
# TorrentList instance with torrents.
|
12
|
+
# When parser crashed due to unknown reason it returns +nil+.
|
13
|
+
|
14
|
+
def parse(page)
|
15
|
+
raise NotImplementedError, "Abstract method called!"
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Site]
|
19
|
+
|
20
|
+
attr_reader :assigned_site
|
21
|
+
|
22
|
+
# @return [Hash]
|
23
|
+
|
24
|
+
attr_reader :settings
|
25
|
+
|
26
|
+
##
|
27
|
+
# Creates Parser instance.
|
28
|
+
#
|
29
|
+
# @param [Site] site
|
30
|
+
#
|
31
|
+
# @return [Base]
|
32
|
+
|
33
|
+
def initialize(site)
|
34
|
+
@assigned_site = site
|
35
|
+
@settings = S[site.name][:parser_settings] || {}
|
36
|
+
end
|
37
|
+
end # class Base
|
38
|
+
end # module Parsers
|
39
|
+
end # module TTWatcher
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class Megashara < SimpleParser
|
6
|
+
private
|
7
|
+
|
8
|
+
def new_pages_list # no-doc
|
9
|
+
return @links if @links.is_a? Array
|
10
|
+
|
11
|
+
unparsed_html_data = structure.css('table[@class="pagination-table"]')
|
12
|
+
.xpath('tr')
|
13
|
+
.xpath('td')[-2]
|
14
|
+
return @links = [] if unparsed_html_data.nil?
|
15
|
+
|
16
|
+
pages_count = unparsed_html_data.css('a').text.to_i - 1
|
17
|
+
link_template = unparsed_html_data.css('a').attr('href').to_s
|
18
|
+
|
19
|
+
@links = (1..pages_count).map do |i|
|
20
|
+
link_template.gsub /(\d+)$/, i.to_s
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def torrents_unparsed # no-doc
|
25
|
+
structure.css('table[@class="table-wide"]').css('table').css('tr')
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param [Nokogiri::Node] unparsed_data
|
29
|
+
#
|
30
|
+
# Surface scan for +megashara+ gives next information about single torrent
|
31
|
+
#
|
32
|
+
# ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
|
33
|
+
# -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
|
34
|
+
# ++ hsh[:url] ==> ex. "example.torrent.side/12345"
|
35
|
+
# ++ hsh[:tracker] ==> ex. :super_cool_tracker
|
36
|
+
# -- hsh[:author] ==> ex. 'Bit kitty fun'
|
37
|
+
# -- hsh[:added_date] ==> ex. '2016-06-15'
|
38
|
+
# ++ hsh[:seeders] ==> ex. 50042
|
39
|
+
# ++ hsh[:leeches] ==> ex. 1
|
40
|
+
# ++ hsh[:size] ==> ex. "20000 mb"
|
41
|
+
# ++ hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
|
42
|
+
# -- hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
|
43
|
+
#
|
44
|
+
# Where '++' means that field is present.
|
45
|
+
#
|
46
|
+
# @return [Torrent]
|
47
|
+
|
48
|
+
def extract_torrent(unparsed_data)
|
49
|
+
hsh = Hash.new
|
50
|
+
|
51
|
+
hsh[:name] = unparsed_data.css('td')[1].text
|
52
|
+
hsh[:magnet_url] = unparsed_data.css('td').css('a')[1].attr('href').to_s
|
53
|
+
hsh[:url] = unparsed_data.css('td').css('a').attr('href').to_s
|
54
|
+
hsh[:size] = unparsed_data.css('td')[3].text
|
55
|
+
hsh[:seeders] = unparsed_data.css('td')[4].text.to_i
|
56
|
+
hsh[:leeches] = unparsed_data.css('td')[5].text.to_i
|
57
|
+
|
58
|
+
hsh[:tracker] = assigned_site.name
|
59
|
+
|
60
|
+
Torrent.new hsh
|
61
|
+
end
|
62
|
+
end # class Megashara
|
63
|
+
end # module Parsers
|
64
|
+
end # module TTWatcher
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class Rutor < SimpleParser
|
6
|
+
private
|
7
|
+
|
8
|
+
def new_pages_list # no-doc
|
9
|
+
return @links if @links.is_a? Array
|
10
|
+
|
11
|
+
@links = rutor_structure.xpath('b').first.xpath('a').map do |node|
|
12
|
+
node.attribute('href').to_s
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def torrents_unparsed # no-doc
|
17
|
+
rutor_structure.css('tr[@class="gai"], tr[@class="tum"]')
|
18
|
+
end
|
19
|
+
|
20
|
+
def rutor_structure # no-doc
|
21
|
+
structure.xpath '//div[@id="index"]'
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param [Nokogiri::Node] unparsed_data
|
25
|
+
#
|
26
|
+
# Surface scan for +rutor+ gives next information about single torrent
|
27
|
+
#
|
28
|
+
# ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
|
29
|
+
# -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
|
30
|
+
# ++ hsh[:url] ==> ex. "example.torrent.side/12345"
|
31
|
+
# ++ hsh[:tracker] ==> ex. :super_cool_tracker
|
32
|
+
# -- hsh[:author] ==> ex. 'Bit kitty fun'
|
33
|
+
# ++ hsh[:added_date] ==> ex. '2016-06-15'
|
34
|
+
# ++ hsh[:seeders] ==> ex. 50042
|
35
|
+
# ++ hsh[:leeches] ==> ex. 1
|
36
|
+
# ++ hsh[:size] ==> ex. "20000 mb"
|
37
|
+
# ++ hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
|
38
|
+
# ++ hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
|
39
|
+
#
|
40
|
+
# Where '++' means that field is present.
|
41
|
+
#
|
42
|
+
# @return [Torrent]
|
43
|
+
|
44
|
+
def extract_torrent(unparsed_data)
|
45
|
+
hsh = Hash.new
|
46
|
+
|
47
|
+
hsh[:short_link] = unparsed_data.css('a[@class="downgif"]').attribute('href').to_s
|
48
|
+
hsh[:magnet_url] = unparsed_data.css('a')[1].attribute('href').to_s
|
49
|
+
hsh[:url] = unparsed_data.css('a')[2].attribute('href').to_s
|
50
|
+
hsh[:name] = unparsed_data.css('a')[2].text
|
51
|
+
hsh[:added_date] = unparsed_data.css('td')[0].text
|
52
|
+
hsh[:seeders] = unparsed_data.css('td[@align="center"]').css('span')[0].text
|
53
|
+
hsh[:leeches] = unparsed_data.css('td[@align="center"]').css('span')[1].text
|
54
|
+
|
55
|
+
if (tmp_size = unparsed_data.css('td[@align="right"]')[1])
|
56
|
+
hsh[:size] = tmp_size.text
|
57
|
+
end
|
58
|
+
|
59
|
+
hsh[:tracker] = assigned_site.name
|
60
|
+
hsh[:download_url] = assigned_site.address(hsh[:short_link])
|
61
|
+
hsh[:url] = assigned_site.address(hsh[:url] )
|
62
|
+
|
63
|
+
Torrent.new hsh
|
64
|
+
end
|
65
|
+
end # class Rutor
|
66
|
+
end # module Parsers
|
67
|
+
end # module TTWatcher
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class SimpleParser < Parsers::Base # no-doc
|
6
|
+
|
7
|
+
# @param [String] page
|
8
|
+
# Url to initial page for parsing.
|
9
|
+
#
|
10
|
+
# @return [TorrentList, nil]
|
11
|
+
# When any exception raised it returns nil
|
12
|
+
# Otherwise it returns TorrentList instance.
|
13
|
+
|
14
|
+
def parse(page)
|
15
|
+
return nil if page.is_a? NilClass
|
16
|
+
self.page = page
|
17
|
+
torrents = TorrentList.new
|
18
|
+
loop do
|
19
|
+
torrents << extract_torrents_from_page
|
20
|
+
next_page? ? continue_with_next_page : break
|
21
|
+
end
|
22
|
+
|
23
|
+
return torrents
|
24
|
+
rescue Exception => exception
|
25
|
+
notificate_about_crash! exception
|
26
|
+
|
27
|
+
return nil
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# @return Array<String>
|
33
|
+
# List of urls that should be scanned before complete torrents search.
|
34
|
+
|
35
|
+
def new_pages_list
|
36
|
+
raise NotImplementedError, "Abstract method called!"
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return Array<Nokogiri::Node>
|
40
|
+
# Each element from this array represents all available information
|
41
|
+
# about 1 torrent.
|
42
|
+
|
43
|
+
def torrents_unparsed
|
44
|
+
raise NotImplementedError, "Abstract method called!"
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# Extracts single torrent from +unparsed_data+.
|
49
|
+
#
|
50
|
+
# @param [Nokogiri::Node] unparsed_data
|
51
|
+
#
|
52
|
+
# @return [Torrent]
|
53
|
+
|
54
|
+
def extract_torrent(unparsed_data)
|
55
|
+
raise NotImplementedError, "Abstract method called!"
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Encoding] (Encoding::UTF_8)
|
59
|
+
# Parser encoding preferences.
|
60
|
+
|
61
|
+
attr_reader :encoding
|
62
|
+
|
63
|
+
# @param [Encoding, String] new_encoding
|
64
|
+
|
65
|
+
def encoding=(new_encoding)
|
66
|
+
@encoding =
|
67
|
+
if new_encoding.is_a?(Encoding)
|
68
|
+
new_encoding
|
69
|
+
else
|
70
|
+
Encoding.find new_encoding
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# @return [String]
|
75
|
+
# Current page.
|
76
|
+
|
77
|
+
attr_reader :page
|
78
|
+
|
79
|
+
def page=(other_page)
|
80
|
+
@page =
|
81
|
+
if other_page.encoding.name == encoding.name
|
82
|
+
other_page
|
83
|
+
else
|
84
|
+
other_page.force_encoding encoding
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Structure for current +page+.
|
90
|
+
#
|
91
|
+
# @return [Nokogiri::HTML::Document]
|
92
|
+
|
93
|
+
def structure
|
94
|
+
Nokogiri::HTML page, nil, encoding.to_s
|
95
|
+
end
|
96
|
+
|
97
|
+
# @return [TorrentList]
|
98
|
+
# Returns extracted torrents from current +page+.
|
99
|
+
|
100
|
+
def extract_torrents_from_page
|
101
|
+
list = TorrentList.new
|
102
|
+
torrents_unparsed.each do |unparsed_torrent|
|
103
|
+
torrent = extract_torrent unparsed_torrent
|
104
|
+
list << torrent
|
105
|
+
end
|
106
|
+
|
107
|
+
return list
|
108
|
+
end
|
109
|
+
|
110
|
+
# @return [TrueClass, FalseClass]
|
111
|
+
# Returns +true+ if some pages wasn't scanned, otherwise returns +false+.
|
112
|
+
|
113
|
+
def next_page?
|
114
|
+
new_pages_list.count > 0
|
115
|
+
end
|
116
|
+
|
117
|
+
def continue_with_next_page # no-doc
|
118
|
+
url = new_pages_list.pop
|
119
|
+
self.page = assigned_site.download_page url
|
120
|
+
end
|
121
|
+
|
122
|
+
def initialize(site) # no-doc
|
123
|
+
super
|
124
|
+
|
125
|
+
self.encoding =
|
126
|
+
settings[:encoding] ? settings[:encoding] : Encoding::UTF_8
|
127
|
+
end
|
128
|
+
|
129
|
+
def notificate_about_crash!(exception)
|
130
|
+
Logger.with_backtrace "Parser #{self.class} crashed with error:\n #{exception.inspect}"
|
131
|
+
end
|
132
|
+
end # class SimpleParser
|
133
|
+
end # module Parsers
|
134
|
+
end # module TTWatcher
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class Unionpeer < SimpleParser
|
6
|
+
private
|
7
|
+
|
8
|
+
def new_pages_list # no-doc
|
9
|
+
return @links if @links.is_a? Array
|
10
|
+
|
11
|
+
@links = structure.css('p[@class="small"]').css('a').map do |node|
|
12
|
+
node.attr('href')
|
13
|
+
end.slice!(1..-2) || []
|
14
|
+
end
|
15
|
+
|
16
|
+
def torrents_unparsed # no-doc
|
17
|
+
structure.css 'tr[class="tCenter hl-tr "]'
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param [Nokogiri::Node] unparsed_data
|
21
|
+
#
|
22
|
+
# Surface scan for +unionpeer+ gives next information about single torrent
|
23
|
+
#
|
24
|
+
# ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
|
25
|
+
# -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
|
26
|
+
# ++ hsh[:url] ==> ex. "example.torrent.side/12345"
|
27
|
+
# ++ hsh[:tracker] ==> ex. :super_cool_tracker
|
28
|
+
# ++ hsh[:author] ==> ex. 'Bit kitty fun'
|
29
|
+
# ++ hsh[:added_date] ==> ex. '2016-06-15'
|
30
|
+
# ++ hsh[:seeders] ==> ex. 50042
|
31
|
+
# ++ hsh[:leeches] ==> ex. 1
|
32
|
+
# ++ hsh[:size] ==> ex. "20000 mb"
|
33
|
+
# -- hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
|
34
|
+
# ++ hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
|
35
|
+
#
|
36
|
+
# Where '++' means that field is present.
|
37
|
+
#
|
38
|
+
# @return [Torrent]
|
39
|
+
|
40
|
+
def extract_torrent(unparsed_data)
|
41
|
+
hsh = Hash.new
|
42
|
+
|
43
|
+
hsh[:name] = unparsed_data.css('a[@class="genmed2 tLink"]').text
|
44
|
+
hsh[:author] = unparsed_data.css('td[@class=row1]')[2].text
|
45
|
+
hsh[:size] = unparsed_data.css('a[@class="small tr-dl"]').text
|
46
|
+
hsh[:added_date] = unparsed_data.css('td[@class="row4 small nowrap"]').css('p')[1].text
|
47
|
+
hsh[:seeders] = unparsed_data.css('td[@class="row4 seedmed bold"]').text.to_i
|
48
|
+
hsh[:leeches] = unparsed_data.css('td[@class="row4 leechmed"]').text.to_i
|
49
|
+
|
50
|
+
url = unparsed_data.css('a[@class="genmed2 tLink"]').attr('href').to_s
|
51
|
+
hsh[:url] = assigned_site.address(url)
|
52
|
+
|
53
|
+
url = unparsed_data.css('a[@class="small tr-dl"]').attr('href').to_s
|
54
|
+
hsh[:download_url] = assigned_site.address(url)
|
55
|
+
|
56
|
+
hsh[:tracker] = assigned_site.name
|
57
|
+
|
58
|
+
return Torrent.new(hsh)
|
59
|
+
end
|
60
|
+
end # class Unionpeer
|
61
|
+
end # module Parsers
|
62
|
+
end # module TTWatcher
|