btscraper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ff105dcbe1db93e7f9325458093dfe18ed7d25e2a7751b32de2364641d2f441a
4
+ data.tar.gz: d4ac04e3d787e510f56ae4c6dc8a68674aae1bcfb6424850fe3e4933eb96915d
5
+ SHA512:
6
+ metadata.gz: 98f53d04e61d803b1216bf770839490e6088db99d1b9bcaae5787e5ff09ba4aebb421b2243ce9db904224627ce4a72b1332401db758d2668591d84ef5f28d34b
7
+ data.tar.gz: 865ecfae585fd5f04c38bc2e9c2bdd86d83322e4a89c1eac62b636311d58c60b0918fe036b5745f161e4dd6df295a73a5439936edf60cb97ba1fd4fba2763ef6
@@ -0,0 +1,171 @@
1
+ require 'uri'
2
+ require 'socket'
3
+ require 'timeout'
4
+
5
+ module BTScraper
6
+
7
+ Connection_id = 0x41727101980 # Magic Constant
8
+ Actionconn = 0 # Connection Request
9
+ Actionscrape = 2 # Scrape Request
10
+ Actionerr = 3 # Scrape Error
11
+ Defaulttimeout = 15 # Default timeout is 15s
12
+ Retries = 8 # Maximum number of retransmission
13
+ Sha1_regex = /^[0-9a-f]{40}$/ # Regex to check SHA1 infohashes
14
+
15
+ # @author sherkix
16
+ # @see https://bittorrent.org/beps/bep_0015.html BEP 15
17
+ # This class permits you to scrape an UDP torrent tracker according to BEP 15
18
+ class UDPScrape
19
+
20
+ attr_reader :tracker, :info_hash, :hostname, :port
21
+ # @!attribute [r] tracker
22
+ # @return [String] returns tracker full url
23
+ #
24
+ # @!attribute [r] info_hash
25
+ # @return [Array<String>] returns array of infohashes
26
+ #
27
+ # @!attribute [r] hostname
28
+ # @return [String] returns tracker's hostname
29
+ #
30
+ # @!attribute [r] port
31
+ # @return [Integer] returns tracker's port
32
+ #
33
+ # Create a new UDPScrape object
34
+ #
35
+ # @param tracker [String] Bittorrent tracker server
36
+ # @param info_hash [Array<String>, String] Array of infohashes or single infohash
37
+ #
38
+ #
39
+ # @raise [TypeError] if wrong type of argument is provided
40
+ # @raise [BTScraperError] if the infohashes provided are more than 74
41
+ #
42
+ # @example Default usage
43
+ # scrape_object = BTScraper::UDPScrape.new('udp://example.com:3000/announce', ['c22b5f9178342609428d6f51b2c5af4c0bde6a42'], ['aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d'])
44
+ # scrape_object.scrape
45
+ def initialize(tracker, info_hash)
46
+ unless tracker.instance_of? String
47
+ raise TypeError, "String excpected, got #{tracker.class}"
48
+ end
49
+ unless info_hash.instance_of? String or info_hash.instance_of? Array
50
+ raise TypeError, "String or Array excpected, got #{info_hash.class}"
51
+ end
52
+
53
+ # Maximum number of infohashes is 74 according to BEP 15
54
+ if info_hash.instance_of? Array and info_hash.count > 74
55
+ raise BTScraperError, 'The number of infohashes must be less than 74'
56
+ end
57
+
58
+ if info_hash.instance_of? String
59
+ info_hash.downcase!
60
+ check_info_hash Array(info_hash)
61
+ else
62
+ info_hash.map(&:downcase!)
63
+ check_info_hash info_hash
64
+ end
65
+ @tracker = tracker
66
+ @hostname = URI(@tracker).hostname
67
+ @port = URI(@tracker).port
68
+ @info_hash = Array(info_hash)
69
+ end
70
+ # @example Response example
71
+ # {tracker: "udp://example.com:3000/announce", scraped_data: [{infohash: "c22b5f9178342609428d6f51b2c5af4c0bde6a42", seeders: 20, completed: 1000, leechers: 30}, {infohash: "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d", seeders: 350, completed: 12000, leechers: 23}]}
72
+ # @return [Hash] The method returns an hash with the scraped data
73
+ # @raise [BTScraperError] If the response is less than 8 bytes or if the scraping request fails
74
+ # @raise [BTScraperError] If the scraping request fails
75
+ # @raise [BTScraperError] If the tracker response with a different transaction_id provided by the client
76
+ # @raise [BTScraperError] After 8 timeouts
77
+ def scrape
78
+ attempt = 0
79
+ client = connect_to_tracker
80
+ transaction_id = rand_transaction_id
81
+ buffer = [get_connection_id, Actionscrape, transaction_id].pack('Q>NN')
82
+ @info_hash.each{|x| buffer << x.split.pack('H*')}
83
+ begin
84
+ client.send buffer, 0
85
+ Timeout::timeout(Defaulttimeout * 2**attempt) do
86
+ response = client.recvfrom(4096)
87
+ if response[0].bytesize < 8
88
+ raise BTScraperError, 'The response from the tracker is less than 8 bytes'
89
+ end
90
+ @unpacked_response = response[0].unpack('N*')
91
+ if @unpacked_response[0] == Actionerr
92
+ raise BTScraperError, 'Scrape request failed'
93
+ end
94
+ unless @unpacked_response[1] == transaction_id
95
+ raise BTScraperError, 'Invalid transaction id got from tracker'
96
+ end
97
+ end
98
+ rescue Timeout::Error
99
+ attempt+=1
100
+ puts "#{attempt} Request to #{@hostname} timed out, retying after #{Defaulttimeout * 2 ** attempt}s"
101
+ retry if attempt <= Retries
102
+ raise BTScraperError, 'Max retries exceeded'
103
+ ensure
104
+ client.close
105
+ end
106
+ hash = {tracker: @tracker, scraped_data:[]}
107
+ create_scrape_hash @info_hash, @unpacked_response, hash
108
+ end
109
+
110
+ private
111
+
112
+ # @return [Array<Integer>] This method makes request to the bittorrent tracker to get a connection_id
113
+ def get_connection_id
114
+ attempt = 0
115
+ client = connect_to_tracker
116
+ transaction_id = rand_transaction_id
117
+ buffer = [Connection_id, Actionconn, transaction_id].pack('Q>N*')
118
+ begin
119
+ client.send buffer, 0
120
+ Timeout::timeout(Defaulttimeout * 2**attempt) do
121
+ response = client.recvfrom(16)
122
+ if response[0].bytesize > 16
123
+ raise BTScraperError, 'The response from the tracker is greater than 16 bytes'
124
+ end
125
+ @unpacked_response = response[0].unpack('NNQ>')
126
+ unless @unpacked_response[0] == Actionconn
127
+ raise BTScraperError, "The action number received from the tracker was not #{Actionconn}"
128
+ end
129
+ unless @unpacked_response[1] == transaction_id
130
+ raise BTScraperError, 'Invalid transaction id got from tracker'
131
+ end
132
+ end
133
+ rescue Timeout::Error
134
+ attempt+=1
135
+ puts "#{attempt} Request to #{@hostname} timed out, retrying after #{Defaulttimeout * 2 ** attempt}s"
136
+ retry if attempt <= Retries
137
+ raise BTScraperError, 'Max retries exceeded'
138
+ ensure
139
+ client.close
140
+ end
141
+ @unpacked_response[2]
142
+ end
143
+
144
+ def connect_to_tracker
145
+ client = UDPSocket.new
146
+ client.connect(@hostname, @port)
147
+ client
148
+ end
149
+
150
+ def rand_transaction_id
151
+ rand(0..4294967295)
152
+ end
153
+
154
+ def check_info_hash(info_hash)
155
+ info_hash.each{|x| raise BTScraperError, 'Invalid infohash provided' unless x.match?(Sha1_regex)}
156
+ end
157
+
158
+ def create_scrape_hash(info_hash, response, hash)
159
+ i = 2
160
+ info_hash.each do |x|
161
+ temp_hash = {infohash: x}
162
+ temp_hash[:seeders] = response[i]
163
+ temp_hash[:completed] = response[i+1]
164
+ temp_hash[:leechers] = response[i+2]
165
+ i+=3
166
+ hash[:scraped_data].push temp_hash
167
+ end
168
+ hash
169
+ end
170
+ end
171
+ end
data/lib/btscraper.rb ADDED
@@ -0,0 +1,14 @@
1
+
2
+ # Scrape library for bittorrent trackers
3
+
4
+ module BTScraper
5
+ # @!visibility private
6
+ VERSION = '0.1.0'
7
+
8
+ # Base class for exceptions
9
+ class BTScraperError < StandardError
10
+ end
11
+
12
+ glob = File.join(File.dirname(__FILE__), 'btscraper/**/*.rb')
13
+ Dir[glob].sort.each {|file| require file }
14
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: btscraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Sherkix
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: uri
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: binascii
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: timeout
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: minitest
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ description: btscraper is a simple ruby library that allows to retrieve the state
69
+ of a torrent from a tracker
70
+ executables: []
71
+ extensions: []
72
+ extra_rdoc_files: []
73
+ files:
74
+ - lib/btscraper.rb
75
+ - lib/btscraper/udpscrape.rb
76
+ homepage: https://github.com/sherkix/btscraper
77
+ licenses:
78
+ - MIT
79
+ metadata: {}
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: 2.7.8
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubygems_version: 3.7.2
95
+ specification_version: 4
96
+ summary: Scrape library for bittorrent trackers
97
+ test_files: []