btscraper 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/btscraper/udpscrape.rb +171 -0
- data/lib/btscraper.rb +14 -0
- metadata +97 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ff105dcbe1db93e7f9325458093dfe18ed7d25e2a7751b32de2364641d2f441a
|
|
4
|
+
data.tar.gz: d4ac04e3d787e510f56ae4c6dc8a68674aae1bcfb6424850fe3e4933eb96915d
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 98f53d04e61d803b1216bf770839490e6088db99d1b9bcaae5787e5ff09ba4aebb421b2243ce9db904224627ce4a72b1332401db758d2668591d84ef5f28d34b
|
|
7
|
+
data.tar.gz: 865ecfae585fd5f04c38bc2e9c2bdd86d83322e4a89c1eac62b636311d58c60b0918fe036b5745f161e4dd6df295a73a5439936edf60cb97ba1fd4fba2763ef6
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
require 'uri'
|
|
2
|
+
require 'socket'
|
|
3
|
+
require 'timeout'
|
|
4
|
+
|
|
5
|
+
module BTScraper
|
|
6
|
+
|
|
7
|
+
Connection_id = 0x41727101980 # Magic Constant
|
|
8
|
+
Actionconn = 0 # Connection Request
|
|
9
|
+
Actionscrape = 2 # Scrape Request
|
|
10
|
+
Actionerr = 3 # Scrape Error
|
|
11
|
+
Defaulttimeout = 15 # Default timeout is 15s
|
|
12
|
+
Retries = 8 # Maximum number of retransmission
|
|
13
|
+
Sha1_regex = /^[0-9a-f]{40}$/ # Regex to check SHA1 infohashes
|
|
14
|
+
|
|
15
|
+
# @author sherkix
|
|
16
|
+
# @see https://bittorrent.org/beps/bep_0015.html BEP 15
|
|
17
|
+
# This class permits you to scrape an UDP torrent tracker according to BEP 15
|
|
18
|
+
class UDPScrape
|
|
19
|
+
|
|
20
|
+
attr_reader :tracker, :info_hash, :hostname, :port
|
|
21
|
+
# @!attribute [r] tracker
|
|
22
|
+
# @return [String] returns tracker full url
|
|
23
|
+
#
|
|
24
|
+
# @!attribute [r] info_hash
|
|
25
|
+
# @return [Array<String>] returns array of infohashes
|
|
26
|
+
#
|
|
27
|
+
# @!attribute [r] hostname
|
|
28
|
+
# @return [String] returns tracker's hostname
|
|
29
|
+
#
|
|
30
|
+
# @!attribute [r] port
|
|
31
|
+
# @return [Integer] returns tracker's port
|
|
32
|
+
#
|
|
33
|
+
# Create a new UDPScrape object
|
|
34
|
+
#
|
|
35
|
+
# @param tracker [String] Bittorrent tracker server
|
|
36
|
+
# @param info_hash [Array<String>, String] Array of infohashes or single infohash
|
|
37
|
+
#
|
|
38
|
+
#
|
|
39
|
+
# @raise [TypeError] if wrong type of argument is provided
|
|
40
|
+
# @raise [BTScraperError] if the infohashes provided are more than 74
|
|
41
|
+
#
|
|
42
|
+
# @example Default usage
|
|
43
|
+
# scrape_object = BTScraper::UDPScrape.new('udp://example.com:3000/announce', ['c22b5f9178342609428d6f51b2c5af4c0bde6a42'], ['aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d'])
|
|
44
|
+
# scrape_object.scrape
|
|
45
|
+
def initialize(tracker, info_hash)
|
|
46
|
+
unless tracker.instance_of? String
|
|
47
|
+
raise TypeError, "String excpected, got #{tracker.class}"
|
|
48
|
+
end
|
|
49
|
+
unless info_hash.instance_of? String or info_hash.instance_of? Array
|
|
50
|
+
raise TypeError, "String or Array excpected, got #{info_hash.class}"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Maximum number of infohashes is 74 according to BEP 15
|
|
54
|
+
if info_hash.instance_of? Array and info_hash.count > 74
|
|
55
|
+
raise BTScraperError, 'The number of infohashes must be less than 74'
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
if info_hash.instance_of? String
|
|
59
|
+
info_hash.downcase!
|
|
60
|
+
check_info_hash Array(info_hash)
|
|
61
|
+
else
|
|
62
|
+
info_hash.map(&:downcase!)
|
|
63
|
+
check_info_hash info_hash
|
|
64
|
+
end
|
|
65
|
+
@tracker = tracker
|
|
66
|
+
@hostname = URI(@tracker).hostname
|
|
67
|
+
@port = URI(@tracker).port
|
|
68
|
+
@info_hash = Array(info_hash)
|
|
69
|
+
end
|
|
70
|
+
# @example Response example
|
|
71
|
+
# {tracker: "udp://example.com:3000/announce", scraped_data: [{infohash: "c22b5f9178342609428d6f51b2c5af4c0bde6a42", seeders: 20, completed: 1000, leechers: 30}, {infohash: "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d", seeders: 350, completed: 12000, leechers: 23}]}
|
|
72
|
+
# @return [Hash] The method returns an hash with the scraped data
|
|
73
|
+
# @raise [BTScraperError] If the response is less than 8 bytes or if the scraping request fails
|
|
74
|
+
# @raise [BTScraperError] If the scraping request fails
|
|
75
|
+
# @raise [BTScraperError] If the tracker response with a different transaction_id provided by the client
|
|
76
|
+
# @raise [BTScraperError] After 8 timeouts
|
|
77
|
+
def scrape
|
|
78
|
+
attempt = 0
|
|
79
|
+
client = connect_to_tracker
|
|
80
|
+
transaction_id = rand_transaction_id
|
|
81
|
+
buffer = [get_connection_id, Actionscrape, transaction_id].pack('Q>NN')
|
|
82
|
+
@info_hash.each{|x| buffer << x.split.pack('H*')}
|
|
83
|
+
begin
|
|
84
|
+
client.send buffer, 0
|
|
85
|
+
Timeout::timeout(Defaulttimeout * 2**attempt) do
|
|
86
|
+
response = client.recvfrom(4096)
|
|
87
|
+
if response[0].bytesize < 8
|
|
88
|
+
raise BTScraperError, 'The response from the tracker is less than 8 bytes'
|
|
89
|
+
end
|
|
90
|
+
@unpacked_response = response[0].unpack('N*')
|
|
91
|
+
if @unpacked_response[0] == Actionerr
|
|
92
|
+
raise BTScraperError, 'Scrape request failed'
|
|
93
|
+
end
|
|
94
|
+
unless @unpacked_response[1] == transaction_id
|
|
95
|
+
raise BTScraperError, 'Invalid transaction id got from tracker'
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
rescue Timeout::Error
|
|
99
|
+
attempt+=1
|
|
100
|
+
puts "#{attempt} Request to #{@hostname} timed out, retying after #{Defaulttimeout * 2 ** attempt}s"
|
|
101
|
+
retry if attempt <= Retries
|
|
102
|
+
raise BTScraperError, 'Max retries exceeded'
|
|
103
|
+
ensure
|
|
104
|
+
client.close
|
|
105
|
+
end
|
|
106
|
+
hash = {tracker: @tracker, scraped_data:[]}
|
|
107
|
+
create_scrape_hash @info_hash, @unpacked_response, hash
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
# @return [Array<Integer>] This method makes request to the bittorrent tracker to get a connection_id
|
|
113
|
+
def get_connection_id
|
|
114
|
+
attempt = 0
|
|
115
|
+
client = connect_to_tracker
|
|
116
|
+
transaction_id = rand_transaction_id
|
|
117
|
+
buffer = [Connection_id, Actionconn, transaction_id].pack('Q>N*')
|
|
118
|
+
begin
|
|
119
|
+
client.send buffer, 0
|
|
120
|
+
Timeout::timeout(Defaulttimeout * 2**attempt) do
|
|
121
|
+
response = client.recvfrom(16)
|
|
122
|
+
if response[0].bytesize > 16
|
|
123
|
+
raise BTScraperError, 'The response from the tracker is greater than 16 bytes'
|
|
124
|
+
end
|
|
125
|
+
@unpacked_response = response[0].unpack('NNQ>')
|
|
126
|
+
unless @unpacked_response[0] == Actionconn
|
|
127
|
+
raise BTScraperError, "The action number received from the tracker was not #{Actionconn}"
|
|
128
|
+
end
|
|
129
|
+
unless @unpacked_response[1] == transaction_id
|
|
130
|
+
raise BTScraperError, 'Invalid transaction id got from tracker'
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
rescue Timeout::Error
|
|
134
|
+
attempt+=1
|
|
135
|
+
puts "#{attempt} Request to #{@hostname} timed out, retrying after #{Defaulttimeout * 2 ** attempt}s"
|
|
136
|
+
retry if attempt <= Retries
|
|
137
|
+
raise BTScraperError, 'Max retries exceeded'
|
|
138
|
+
ensure
|
|
139
|
+
client.close
|
|
140
|
+
end
|
|
141
|
+
@unpacked_response[2]
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def connect_to_tracker
|
|
145
|
+
client = UDPSocket.new
|
|
146
|
+
client.connect(@hostname, @port)
|
|
147
|
+
client
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def rand_transaction_id
|
|
151
|
+
rand(0..4294967295)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def check_info_hash(info_hash)
|
|
155
|
+
info_hash.each{|x| raise BTScraperError, 'Invalid infohash provided' unless x.match?(Sha1_regex)}
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def create_scrape_hash(info_hash, response, hash)
|
|
159
|
+
i = 2
|
|
160
|
+
info_hash.each do |x|
|
|
161
|
+
temp_hash = {infohash: x}
|
|
162
|
+
temp_hash[:seeders] = response[i]
|
|
163
|
+
temp_hash[:completed] = response[i+1]
|
|
164
|
+
temp_hash[:leechers] = response[i+2]
|
|
165
|
+
i+=3
|
|
166
|
+
hash[:scraped_data].push temp_hash
|
|
167
|
+
end
|
|
168
|
+
hash
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
data/lib/btscraper.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
|
|
2
|
+
# Scrape library for bittorrent trackers
|
|
3
|
+
|
|
4
|
+
module BTScraper
|
|
5
|
+
# @!visibility private
|
|
6
|
+
VERSION = '0.1.0'
|
|
7
|
+
|
|
8
|
+
# Base class for exceptions
|
|
9
|
+
class BTScraperError < StandardError
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
glob = File.join(File.dirname(__FILE__), 'btscraper/**/*.rb')
|
|
13
|
+
Dir[glob].sort.each {|file| require file }
|
|
14
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: btscraper
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Sherkix
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: uri
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: binascii
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: timeout
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '0'
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: minitest
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '0'
|
|
68
|
+
description: btscraper is a simple ruby library that allows to retrieve the state
|
|
69
|
+
of a torrent from a tracker
|
|
70
|
+
executables: []
|
|
71
|
+
extensions: []
|
|
72
|
+
extra_rdoc_files: []
|
|
73
|
+
files:
|
|
74
|
+
- lib/btscraper.rb
|
|
75
|
+
- lib/btscraper/udpscrape.rb
|
|
76
|
+
homepage: https://github.com/sherkix/btscraper
|
|
77
|
+
licenses:
|
|
78
|
+
- MIT
|
|
79
|
+
metadata: {}
|
|
80
|
+
rdoc_options: []
|
|
81
|
+
require_paths:
|
|
82
|
+
- lib
|
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
84
|
+
requirements:
|
|
85
|
+
- - ">="
|
|
86
|
+
- !ruby/object:Gem::Version
|
|
87
|
+
version: 2.7.8
|
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
|
+
requirements:
|
|
90
|
+
- - ">="
|
|
91
|
+
- !ruby/object:Gem::Version
|
|
92
|
+
version: '0'
|
|
93
|
+
requirements: []
|
|
94
|
+
rubygems_version: 3.7.2
|
|
95
|
+
specification_version: 4
|
|
96
|
+
summary: Scrape library for bittorrent trackers
|
|
97
|
+
test_files: []
|