ttwatcher 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +20 -0
- data/LICENSE +9 -0
- data/sources/ttwatcher.rb +70 -0
- data/sources/ttwatcher/helpers.rb +24 -0
- data/sources/ttwatcher/logger.rb +40 -0
- data/sources/ttwatcher/project_structure.rb +50 -0
- data/sources/ttwatcher/sites.rb +68 -0
- data/sources/ttwatcher/sites/config.rb +20 -0
- data/sources/ttwatcher/sites/config.yml +29 -0
- data/sources/ttwatcher/sites/connection.rb +82 -0
- data/sources/ttwatcher/sites/connection/scheme.rb +79 -0
- data/sources/ttwatcher/sites/connection/url.rb +66 -0
- data/sources/ttwatcher/sites/megashara.rb +18 -0
- data/sources/ttwatcher/sites/parsers/abstract_parser.rb +39 -0
- data/sources/ttwatcher/sites/parsers/megashara_parser.rb +64 -0
- data/sources/ttwatcher/sites/parsers/rutor_parser.rb +67 -0
- data/sources/ttwatcher/sites/parsers/simple_parser.rb +134 -0
- data/sources/ttwatcher/sites/parsers/unionpeer_parser.rb +62 -0
- data/sources/ttwatcher/sites/parsers/zooqle_parser.rb +80 -0
- data/sources/ttwatcher/sites/rutor.rb +13 -0
- data/sources/ttwatcher/sites/site.rb +92 -0
- data/sources/ttwatcher/sites/torrent_site.rb +70 -0
- data/sources/ttwatcher/sites/unionpeer.rb +18 -0
- data/sources/ttwatcher/sites/zooqle.rb +18 -0
- data/sources/ttwatcher/torrent.rb +87 -0
- data/sources/ttwatcher/torrent_agent.rb +34 -0
- data/sources/ttwatcher/torrent_list.rb +51 -0
- data/spec/sources/ttwatcher/sites_spec.rb +8 -0
- data/spec/sources/ttwatcher/torrent_list_spec.rb +46 -0
- data/spec/spec_helper.rb +20 -0
- metadata +156 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Sites
|
5
|
+
module InternetConnection
|
6
|
+
# TODO REFACTOR
|
7
|
+
class Url # no-doc
|
8
|
+
|
9
|
+
##
|
10
|
+
# Creates new Url instance.
|
11
|
+
#
|
12
|
+
# @param [String] url
|
13
|
+
# An url in any form.
|
14
|
+
#
|
15
|
+
# @param [Hash] params
|
16
|
+
# @option params [Symbol] :force_scheme
|
17
|
+
# @option params [Hash] :query_params
|
18
|
+
# @option params [String] :encoding
|
19
|
+
#
|
20
|
+
# @return [InternetConnection::Url]
|
21
|
+
|
22
|
+
def initialize(url, **params)
|
23
|
+
self.class.include Scheme
|
24
|
+
|
25
|
+
@url = url
|
26
|
+
@query = params[:query_params] || {}
|
27
|
+
@encoding = params[:encoding] || 'utf-8'
|
28
|
+
|
29
|
+
set_scheme params[:force_scheme]
|
30
|
+
encode_url
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :encoding # no-doc
|
36
|
+
attr_reader :url # no-doc
|
37
|
+
|
38
|
+
##
|
39
|
+
# return +query+ with correct encoding
|
40
|
+
|
41
|
+
def query_normalization
|
42
|
+
uri = Addressable::URI.parse url
|
43
|
+
tmp_q = (uri.query_values || {}).merge(@query)
|
44
|
+
|
45
|
+
return tmp_q if tmp_q.empty? && tmp_q.values.all? { |v| v.encode == @encoding }
|
46
|
+
|
47
|
+
tmp_q.each_key do |k|
|
48
|
+
tmp_q[k] = tmp_q[k].encode(encoding, invalid: :replace,
|
49
|
+
undef: :replace)
|
50
|
+
end
|
51
|
+
|
52
|
+
return tmp_q
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Normalization. String to rfc-standard uri
|
57
|
+
|
58
|
+
def encode_url
|
59
|
+
uri = Addressable::URI.parse url
|
60
|
+
uri.query_values = query_normalization
|
61
|
+
@url.replace uri.normalize.to_s.force_encoding(@encoding)
|
62
|
+
end
|
63
|
+
end # class Url
|
64
|
+
end # module InternetConnection
|
65
|
+
end # module Sites
|
66
|
+
end # module TTWatcher
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Sites
|
5
|
+
class Megashara < TorrentSite
|
6
|
+
|
7
|
+
def find_torrent(name) # no-doc
|
8
|
+
super name, { url: { query_params: { text: name } } }
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def search_url(name = nil) # no-doc
|
14
|
+
domain_name + '/search/%s' % name
|
15
|
+
end
|
16
|
+
end # class Megashara
|
17
|
+
end # module Sites
|
18
|
+
end # module TTWatcher
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class Base # no-doc
|
6
|
+
|
7
|
+
# @param [String] page
|
8
|
+
# Url to initial page for parsing.
|
9
|
+
#
|
10
|
+
# @return [TorrentList, NilClass]
|
11
|
+
# TorrentList instance with torrents.
|
12
|
+
# When parser crashed due to unknown reason it returns +nil+.
|
13
|
+
|
14
|
+
def parse(page)
|
15
|
+
raise NotImplementedError, "Abstract method called!"
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Site]
|
19
|
+
|
20
|
+
attr_reader :assigned_site
|
21
|
+
|
22
|
+
# @return [Hash]
|
23
|
+
|
24
|
+
attr_reader :settings
|
25
|
+
|
26
|
+
##
|
27
|
+
# Creates Parser instance.
|
28
|
+
#
|
29
|
+
# @param [Site] site
|
30
|
+
#
|
31
|
+
# @return [Base]
|
32
|
+
|
33
|
+
def initialize(site)
|
34
|
+
@assigned_site = site
|
35
|
+
@settings = S[site.name][:parser_settings] || {}
|
36
|
+
end
|
37
|
+
end # class Base
|
38
|
+
end # module Parsers
|
39
|
+
end # module TTWatcher
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class Megashara < SimpleParser
|
6
|
+
private
|
7
|
+
|
8
|
+
def new_pages_list # no-doc
|
9
|
+
return @links if @links.is_a? Array
|
10
|
+
|
11
|
+
unparsed_html_data = structure.css('table[@class="pagination-table"]')
|
12
|
+
.xpath('tr')
|
13
|
+
.xpath('td')[-2]
|
14
|
+
return @links = [] if unparsed_html_data.nil?
|
15
|
+
|
16
|
+
pages_count = unparsed_html_data.css('a').text.to_i - 1
|
17
|
+
link_template = unparsed_html_data.css('a').attr('href').to_s
|
18
|
+
|
19
|
+
@links = (1..pages_count).map do |i|
|
20
|
+
link_template.gsub /(\d+)$/, i.to_s
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def torrents_unparsed # no-doc
|
25
|
+
structure.css('table[@class="table-wide"]').css('table').css('tr')
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param [Nokogiri::Node] unparsed_data
|
29
|
+
#
|
30
|
+
# Surface scan for +megashara+ gives next information about single torrent
|
31
|
+
#
|
32
|
+
# ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
|
33
|
+
# -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
|
34
|
+
# ++ hsh[:url] ==> ex. "example.torrent.side/12345"
|
35
|
+
# ++ hsh[:tracker] ==> ex. :super_cool_tracker
|
36
|
+
# -- hsh[:author] ==> ex. 'Bit kitty fun'
|
37
|
+
# -- hsh[:added_date] ==> ex. '2016-06-15'
|
38
|
+
# ++ hsh[:seeders] ==> ex. 50042
|
39
|
+
# ++ hsh[:leeches] ==> ex. 1
|
40
|
+
# ++ hsh[:size] ==> ex. "20000 mb"
|
41
|
+
# ++ hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
|
42
|
+
# -- hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
|
43
|
+
#
|
44
|
+
# Where '++' means that field is present.
|
45
|
+
#
|
46
|
+
# @return [Torrent]
|
47
|
+
|
48
|
+
def extract_torrent(unparsed_data)
|
49
|
+
hsh = Hash.new
|
50
|
+
|
51
|
+
hsh[:name] = unparsed_data.css('td')[1].text
|
52
|
+
hsh[:magnet_url] = unparsed_data.css('td').css('a')[1].attr('href').to_s
|
53
|
+
hsh[:url] = unparsed_data.css('td').css('a').attr('href').to_s
|
54
|
+
hsh[:size] = unparsed_data.css('td')[3].text
|
55
|
+
hsh[:seeders] = unparsed_data.css('td')[4].text.to_i
|
56
|
+
hsh[:leeches] = unparsed_data.css('td')[5].text.to_i
|
57
|
+
|
58
|
+
hsh[:tracker] = assigned_site.name
|
59
|
+
|
60
|
+
Torrent.new hsh
|
61
|
+
end
|
62
|
+
end # class Megashara
|
63
|
+
end # module Parsers
|
64
|
+
end # module TTWatcher
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class Rutor < SimpleParser
|
6
|
+
private
|
7
|
+
|
8
|
+
def new_pages_list # no-doc
|
9
|
+
return @links if @links.is_a? Array
|
10
|
+
|
11
|
+
@links = rutor_structure.xpath('b').first.xpath('a').map do |node|
|
12
|
+
node.attribute('href').to_s
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def torrents_unparsed # no-doc
|
17
|
+
rutor_structure.css('tr[@class="gai"], tr[@class="tum"]')
|
18
|
+
end
|
19
|
+
|
20
|
+
def rutor_structure # no-doc
|
21
|
+
structure.xpath '//div[@id="index"]'
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param [Nokogiri::Node] unparsed_data
|
25
|
+
#
|
26
|
+
# Surface scan for +rutor+ gives next information about single torrent
|
27
|
+
#
|
28
|
+
# ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
|
29
|
+
# -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
|
30
|
+
# ++ hsh[:url] ==> ex. "example.torrent.side/12345"
|
31
|
+
# ++ hsh[:tracker] ==> ex. :super_cool_tracker
|
32
|
+
# -- hsh[:author] ==> ex. 'Bit kitty fun'
|
33
|
+
# ++ hsh[:added_date] ==> ex. '2016-06-15'
|
34
|
+
# ++ hsh[:seeders] ==> ex. 50042
|
35
|
+
# ++ hsh[:leeches] ==> ex. 1
|
36
|
+
# ++ hsh[:size] ==> ex. "20000 mb"
|
37
|
+
# ++ hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
|
38
|
+
# ++ hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
|
39
|
+
#
|
40
|
+
# Where '++' means that field is present.
|
41
|
+
#
|
42
|
+
# @return [Torrent]
|
43
|
+
|
44
|
+
def extract_torrent(unparsed_data)
|
45
|
+
hsh = Hash.new
|
46
|
+
|
47
|
+
hsh[:short_link] = unparsed_data.css('a[@class="downgif"]').attribute('href').to_s
|
48
|
+
hsh[:magnet_url] = unparsed_data.css('a')[1].attribute('href').to_s
|
49
|
+
hsh[:url] = unparsed_data.css('a')[2].attribute('href').to_s
|
50
|
+
hsh[:name] = unparsed_data.css('a')[2].text
|
51
|
+
hsh[:added_date] = unparsed_data.css('td')[0].text
|
52
|
+
hsh[:seeders] = unparsed_data.css('td[@align="center"]').css('span')[0].text
|
53
|
+
hsh[:leeches] = unparsed_data.css('td[@align="center"]').css('span')[1].text
|
54
|
+
|
55
|
+
if (tmp_size = unparsed_data.css('td[@align="right"]')[1])
|
56
|
+
hsh[:size] = tmp_size.text
|
57
|
+
end
|
58
|
+
|
59
|
+
hsh[:tracker] = assigned_site.name
|
60
|
+
hsh[:download_url] = assigned_site.address(hsh[:short_link])
|
61
|
+
hsh[:url] = assigned_site.address(hsh[:url] )
|
62
|
+
|
63
|
+
Torrent.new hsh
|
64
|
+
end
|
65
|
+
end # class Rutor
|
66
|
+
end # module Parsers
|
67
|
+
end # module TTWatcher
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class SimpleParser < Parsers::Base # no-doc
|
6
|
+
|
7
|
+
# @param [String] page
|
8
|
+
# Url to initial page for parsing.
|
9
|
+
#
|
10
|
+
# @return [TorrentList, nil]
|
11
|
+
# When any exception raised it returns nil
|
12
|
+
# Otherwise it returns TorrentList instance.
|
13
|
+
|
14
|
+
def parse(page)
|
15
|
+
return nil if page.is_a? NilClass
|
16
|
+
self.page = page
|
17
|
+
torrents = TorrentList.new
|
18
|
+
loop do
|
19
|
+
torrents << extract_torrents_from_page
|
20
|
+
next_page? ? continue_with_next_page : break
|
21
|
+
end
|
22
|
+
|
23
|
+
return torrents
|
24
|
+
rescue Exception => exception
|
25
|
+
notificate_about_crash! exception
|
26
|
+
|
27
|
+
return nil
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# @return Array<String>
|
33
|
+
# List of urls that should be scanned before complete torrents search.
|
34
|
+
|
35
|
+
def new_pages_list
|
36
|
+
raise NotImplementedError, "Abstract method called!"
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return Array<Nokogiri::Node>
|
40
|
+
# Each element from this array represents all available information
|
41
|
+
# about 1 torrent.
|
42
|
+
|
43
|
+
def torrents_unparsed
|
44
|
+
raise NotImplementedError, "Abstract method called!"
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# Extracts single torrent from +unparsed_data+.
|
49
|
+
#
|
50
|
+
# @param [Nokogiri::Node] unparsed_data
|
51
|
+
#
|
52
|
+
# @return [Torrent]
|
53
|
+
|
54
|
+
def extract_torrent(unparsed_data)
|
55
|
+
raise NotImplementedError, "Abstract method called!"
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Encoding] (Encoding::UTF_8)
|
59
|
+
# Parser encoding preferences.
|
60
|
+
|
61
|
+
attr_reader :encoding
|
62
|
+
|
63
|
+
# @param [Encoding, String] new_encoding
|
64
|
+
|
65
|
+
def encoding=(new_encoding)
|
66
|
+
@encoding =
|
67
|
+
if new_encoding.is_a?(Encoding)
|
68
|
+
new_encoding
|
69
|
+
else
|
70
|
+
Encoding.find new_encoding
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# @return [String]
|
75
|
+
# Current page.
|
76
|
+
|
77
|
+
attr_reader :page
|
78
|
+
|
79
|
+
def page=(other_page)
|
80
|
+
@page =
|
81
|
+
if other_page.encoding.name == encoding.name
|
82
|
+
other_page
|
83
|
+
else
|
84
|
+
other_page.force_encoding encoding
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Structure for current +page+.
|
90
|
+
#
|
91
|
+
# @return [Nokogiri::HTML::Document]
|
92
|
+
|
93
|
+
def structure
|
94
|
+
Nokogiri::HTML page, nil, encoding.to_s
|
95
|
+
end
|
96
|
+
|
97
|
+
# @return [TorrentList]
|
98
|
+
# Returns extracted torrents from current +page+.
|
99
|
+
|
100
|
+
def extract_torrents_from_page
|
101
|
+
list = TorrentList.new
|
102
|
+
torrents_unparsed.each do |unparsed_torrent|
|
103
|
+
torrent = extract_torrent unparsed_torrent
|
104
|
+
list << torrent
|
105
|
+
end
|
106
|
+
|
107
|
+
return list
|
108
|
+
end
|
109
|
+
|
110
|
+
# @return [TrueClass, FalseClass]
|
111
|
+
# Returns +true+ if some pages wasn't scanned, otherwise returns +false+.
|
112
|
+
|
113
|
+
def next_page?
|
114
|
+
new_pages_list.count > 0
|
115
|
+
end
|
116
|
+
|
117
|
+
def continue_with_next_page # no-doc
|
118
|
+
url = new_pages_list.pop
|
119
|
+
self.page = assigned_site.download_page url
|
120
|
+
end
|
121
|
+
|
122
|
+
def initialize(site) # no-doc
|
123
|
+
super
|
124
|
+
|
125
|
+
self.encoding =
|
126
|
+
settings[:encoding] ? settings[:encoding] : Encoding::UTF_8
|
127
|
+
end
|
128
|
+
|
129
|
+
def notificate_about_crash!(exception)
|
130
|
+
Logger.with_backtrace "Parser #{self.class} crashed with error:\n #{exception.inspect}"
|
131
|
+
end
|
132
|
+
end # class SimpleParser
|
133
|
+
end # module Parsers
|
134
|
+
end # module TTWatcher
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module TTWatcher
|
4
|
+
module Parsers
|
5
|
+
class Unionpeer < SimpleParser
|
6
|
+
private
|
7
|
+
|
8
|
+
def new_pages_list # no-doc
|
9
|
+
return @links if @links.is_a? Array
|
10
|
+
|
11
|
+
@links = structure.css('p[@class="small"]').css('a').map do |node|
|
12
|
+
node.attr('href')
|
13
|
+
end.slice!(1..-2) || []
|
14
|
+
end
|
15
|
+
|
16
|
+
def torrents_unparsed # no-doc
|
17
|
+
structure.css 'tr[class="tCenter hl-tr "]'
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param [Nokogiri::Node] unparsed_data
|
21
|
+
#
|
22
|
+
# Surface scan for +unionpeer+ gives next information about single torrent
|
23
|
+
#
|
24
|
+
# ++ hsh[:name] ==> ex. "Cats swimming in pool 2016 BDRIP"
|
25
|
+
# -- hsh[:description] ==> ex. "Hot CATS. Summer 2016"
|
26
|
+
# ++ hsh[:url] ==> ex. "example.torrent.side/12345"
|
27
|
+
# ++ hsh[:tracker] ==> ex. :super_cool_tracker
|
28
|
+
# ++ hsh[:author] ==> ex. 'Bit kitty fun'
|
29
|
+
# ++ hsh[:added_date] ==> ex. '2016-06-15'
|
30
|
+
# ++ hsh[:seeders] ==> ex. 50042
|
31
|
+
# ++ hsh[:leeches] ==> ex. 1
|
32
|
+
# ++ hsh[:size] ==> ex. "20000 mb"
|
33
|
+
# -- hsh[:magnet_url] ==> ex. "magnet:?xt=urn....................."
|
34
|
+
# ++ hsh[:download_url] ==> ex. "example.torrent.side/12345/download"
|
35
|
+
#
|
36
|
+
# Where '++' means that field is present.
|
37
|
+
#
|
38
|
+
# @return [Torrent]
|
39
|
+
|
40
|
+
def extract_torrent(unparsed_data)
|
41
|
+
hsh = Hash.new
|
42
|
+
|
43
|
+
hsh[:name] = unparsed_data.css('a[@class="genmed2 tLink"]').text
|
44
|
+
hsh[:author] = unparsed_data.css('td[@class=row1]')[2].text
|
45
|
+
hsh[:size] = unparsed_data.css('a[@class="small tr-dl"]').text
|
46
|
+
hsh[:added_date] = unparsed_data.css('td[@class="row4 small nowrap"]').css('p')[1].text
|
47
|
+
hsh[:seeders] = unparsed_data.css('td[@class="row4 seedmed bold"]').text.to_i
|
48
|
+
hsh[:leeches] = unparsed_data.css('td[@class="row4 leechmed"]').text.to_i
|
49
|
+
|
50
|
+
url = unparsed_data.css('a[@class="genmed2 tLink"]').attr('href').to_s
|
51
|
+
hsh[:url] = assigned_site.address(url)
|
52
|
+
|
53
|
+
url = unparsed_data.css('a[@class="small tr-dl"]').attr('href').to_s
|
54
|
+
hsh[:download_url] = assigned_site.address(url)
|
55
|
+
|
56
|
+
hsh[:tracker] = assigned_site.name
|
57
|
+
|
58
|
+
return Torrent.new(hsh)
|
59
|
+
end
|
60
|
+
end # class Unionpeer
|
61
|
+
end # module Parsers
|
62
|
+
end # module TTWatcher
|