btscraper 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/btscraper/checkinfohash.rb +10 -0
- data/lib/btscraper/httpscrape.rb +60 -0
- data/lib/btscraper/udpscrape.rb +9 -14
- data/lib/btscraper.rb +2 -1
- metadata +32 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 213c402de8c6be9cb33d77c557fbdef58e5737ef08e0f06e6b4fca2676c79442
|
|
4
|
+
data.tar.gz: '0810754bca603271dcf2d57c016272ce8e13d6a0c136844c63449b04e3b0d64f'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 37d768fc93959230ada23bf5d445ab573de61479e3a03661193063306842f14c6597c0551a10266d4275d342332ab823e0c0ebdce2ad69b93aab70d21d38b6e5
|
|
7
|
+
data.tar.gz: 94d4f3064ea64239ac7154da5533dd54cadc0b04f3ea3429f0d19aef6f7cdfd9476576e7621fb208ba3d108a2719d64112236083073fbb07f81f066f9b2c1656
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
module BTScraper
|
|
2
|
+
Sha1_regex = /^[0-9a-f]{40}$/ # Regex to check SHA1 infohashes
|
|
3
|
+
|
|
4
|
+
# This method checks if the infohashes are valid
|
|
5
|
+
# @raise [BTScraperError] If the infohash is invalid
|
|
6
|
+
def self.check_info_hash(info_hash)
|
|
7
|
+
info_hash.each{|x| raise BTScraperError, 'Invalid infohash provided' unless x.match?(Sha1_regex)}
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
require 'bencode'
|
|
2
|
+
require 'httparty'
|
|
3
|
+
require 'binascii'
|
|
4
|
+
|
|
5
|
+
module BTScraper
|
|
6
|
+
|
|
7
|
+
# @author sherkix
|
|
8
|
+
# @see https://bittorrent.org/beps/bep_0048.html BEP 48
|
|
9
|
+
# This class permits you to scrape an HTTP torrent tracker according to the BEP 48
|
|
10
|
+
class HTTPScrape
|
|
11
|
+
|
|
12
|
+
attr_reader :tracker, :info_hash
|
|
13
|
+
# @!attribute [r] tracker
|
|
14
|
+
# @return [String] returns tracker full url
|
|
15
|
+
#
|
|
16
|
+
# @!attribute [r] info_hash
|
|
17
|
+
# @return [Array<String>] returns array of infohashes
|
|
18
|
+
#
|
|
19
|
+
# Create a new HTTPScrape object
|
|
20
|
+
#
|
|
21
|
+
# @param tracker [String] Bittorrent HTTP tracker server
|
|
22
|
+
# @param info_hash [Array<String>, String] Array of infohashes or single infohash
|
|
23
|
+
#
|
|
24
|
+
# @raise [TypeError] if wrong type of argument is provided
|
|
25
|
+
#
|
|
26
|
+
# @example Default usage
|
|
27
|
+
# scrape_object = BTScraper::HTTPScrape.new('https://example.com:443/scrape', ['c22b5f9178342609428d6f51b2c5af4c0bde6a42'], ['aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d'])
|
|
28
|
+
# scrape_object.scrape
|
|
29
|
+
def initialize(tracker, info_hash)
|
|
30
|
+
unless tracker.instance_of? String
|
|
31
|
+
raise TypeError, "String excpected, got #{tracker.class}"
|
|
32
|
+
end
|
|
33
|
+
unless info_hash.instance_of? String or info_hash.instance_of? Array
|
|
34
|
+
raise TypeError, "String or Array excpected, got #{info_hash.class}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if info_hash.instance_of? String
|
|
38
|
+
info_hash.downcase!
|
|
39
|
+
BTScraper.check_info_hash Array(info_hash)
|
|
40
|
+
else
|
|
41
|
+
info_hash.map(&:downcase!)
|
|
42
|
+
BTScraper.check_info_hash info_hash
|
|
43
|
+
end
|
|
44
|
+
@tracker = tracker
|
|
45
|
+
@info_hash = Array(info_hash)
|
|
46
|
+
end
|
|
47
|
+
# @example Response example
|
|
48
|
+
# {"files" => {"xxxxxxxxxxxxxxxxxxxxxxxxxx" => {"complete" => 8, "downloaded" => 9, "incomplete" => 4}, "yyyyyyyyyyyyyyyyyyyyyyyyyy" => {"complete" => 81, "downloaded" => 204, "incomplete" => 23}, "zzzzzzzzzzzzzzzzzzzzzzzzzz" => {"complete" => 3, "downloaded" => 26, "incomplete" => 1}}}
|
|
49
|
+
# @return [Hash] The method returns a hash with the scraped data
|
|
50
|
+
def scrape
|
|
51
|
+
unhex_info_hash = @info_hash.map{|x| Binascii.a2b_hex(x)}
|
|
52
|
+
params = unhex_info_hash.map{|h| "info_hash=#{CGI.escape(h.to_s)}"}.join('&')
|
|
53
|
+
begin
|
|
54
|
+
HTTParty.get(@tracker, :query => params, :headers => {'User-Agent' => "btscraper #{VERSION}"}, :timeout => 10).body.bdecode
|
|
55
|
+
rescue HTTParty::Error => e
|
|
56
|
+
raise BTScraperError, e
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
data/lib/btscraper/udpscrape.rb
CHANGED
|
@@ -10,11 +10,10 @@ module BTScraper
|
|
|
10
10
|
Actionerr = 3 # Scrape Error
|
|
11
11
|
Defaulttimeout = 15 # Default timeout is 15s
|
|
12
12
|
Retries = 8 # Maximum number of retransmission
|
|
13
|
-
Sha1_regex = /^[0-9a-f]{40}$/ # Regex to check SHA1 infohashes
|
|
14
13
|
|
|
15
14
|
# @author sherkix
|
|
16
15
|
# @see https://bittorrent.org/beps/bep_0015.html BEP 15
|
|
17
|
-
# This class permits you to scrape an UDP torrent tracker according to BEP 15
|
|
16
|
+
# This class permits you to scrape an UDP torrent tracker according to the BEP 15
|
|
18
17
|
class UDPScrape
|
|
19
18
|
|
|
20
19
|
attr_reader :tracker, :info_hash, :hostname, :port
|
|
@@ -32,7 +31,7 @@ module BTScraper
|
|
|
32
31
|
#
|
|
33
32
|
# Create a new UDPScrape object
|
|
34
33
|
#
|
|
35
|
-
# @param tracker [String] Bittorrent tracker server
|
|
34
|
+
# @param tracker [String] Bittorrent UDP tracker server
|
|
36
35
|
# @param info_hash [Array<String>, String] Array of infohashes or single infohash
|
|
37
36
|
#
|
|
38
37
|
#
|
|
@@ -50,17 +49,17 @@ module BTScraper
|
|
|
50
49
|
raise TypeError, "String or Array excpected, got #{info_hash.class}"
|
|
51
50
|
end
|
|
52
51
|
|
|
53
|
-
# Maximum number of infohashes is 74
|
|
52
|
+
# Maximum number of infohashes is 74
|
|
54
53
|
if info_hash.instance_of? Array and info_hash.count > 74
|
|
55
54
|
raise BTScraperError, 'The number of infohashes must be less than 74'
|
|
56
55
|
end
|
|
57
56
|
|
|
58
57
|
if info_hash.instance_of? String
|
|
59
58
|
info_hash.downcase!
|
|
60
|
-
check_info_hash Array(info_hash)
|
|
59
|
+
BTScraper.check_info_hash Array(info_hash)
|
|
61
60
|
else
|
|
62
61
|
info_hash.map(&:downcase!)
|
|
63
|
-
check_info_hash info_hash
|
|
62
|
+
BTScraper.check_info_hash info_hash
|
|
64
63
|
end
|
|
65
64
|
@tracker = tracker
|
|
66
65
|
@hostname = URI(@tracker).hostname
|
|
@@ -72,7 +71,7 @@ module BTScraper
|
|
|
72
71
|
# @return [Hash] The method returns a hash with the scraped data
|
|
73
72
|
# @raise [BTScraperError] If the response is less than 8 bytes
|
|
74
73
|
# @raise [BTScraperError] If the scraping request fails
|
|
75
|
-
# @raise [BTScraperError] If the tracker
|
|
74
|
+
# @raise [BTScraperError] If the tracker responds with a different transaction_id provided by the client
|
|
76
75
|
# @raise [BTScraperError] After 8 timeouts
|
|
77
76
|
def scrape
|
|
78
77
|
attempt = 0
|
|
@@ -97,7 +96,7 @@ module BTScraper
|
|
|
97
96
|
end
|
|
98
97
|
rescue Timeout::Error
|
|
99
98
|
attempt+=1
|
|
100
|
-
puts "#{attempt} Request to #{@hostname} timed out, retying after #{Defaulttimeout * 2
|
|
99
|
+
puts "#{attempt} Request to #{@hostname} timed out, retying after #{Defaulttimeout * 2**attempt}s"
|
|
101
100
|
retry if attempt <= Retries
|
|
102
101
|
raise BTScraperError, 'Max retries exceeded'
|
|
103
102
|
ensure
|
|
@@ -109,7 +108,7 @@ module BTScraper
|
|
|
109
108
|
|
|
110
109
|
private
|
|
111
110
|
|
|
112
|
-
# @return [Array<Integer>] This method makes request to the bittorrent tracker to get a connection_id
|
|
111
|
+
# @return [Array<Integer>] This method makes a request to the bittorrent tracker to get a connection_id
|
|
113
112
|
def get_connection_id
|
|
114
113
|
attempt = 0
|
|
115
114
|
client = connect_to_tracker
|
|
@@ -132,7 +131,7 @@ module BTScraper
|
|
|
132
131
|
end
|
|
133
132
|
rescue Timeout::Error
|
|
134
133
|
attempt+=1
|
|
135
|
-
puts "#{attempt} Request to #{@hostname} timed out, retrying after #{Defaulttimeout * 2
|
|
134
|
+
puts "#{attempt} Request to #{@hostname} timed out, retrying after #{Defaulttimeout * 2**attempt}s"
|
|
136
135
|
retry if attempt <= Retries
|
|
137
136
|
raise BTScraperError, 'Max retries exceeded'
|
|
138
137
|
ensure
|
|
@@ -151,10 +150,6 @@ module BTScraper
|
|
|
151
150
|
rand(0..4294967295)
|
|
152
151
|
end
|
|
153
152
|
|
|
154
|
-
def check_info_hash(info_hash)
|
|
155
|
-
info_hash.each{|x| raise BTScraperError, 'Invalid infohash provided' unless x.match?(Sha1_regex)}
|
|
156
|
-
end
|
|
157
|
-
|
|
158
153
|
def create_scrape_hash(info_hash, response, hash)
|
|
159
154
|
i = 2
|
|
160
155
|
info_hash.each do |x|
|
data/lib/btscraper.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: btscraper
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sherkix
|
|
@@ -51,6 +51,34 @@ dependencies:
|
|
|
51
51
|
- - ">="
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
53
|
version: '0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: bencode
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '0'
|
|
61
|
+
type: :runtime
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: httparty
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - ">="
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '0'
|
|
75
|
+
type: :runtime
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - ">="
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '0'
|
|
54
82
|
- !ruby/object:Gem::Dependency
|
|
55
83
|
name: minitest
|
|
56
84
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -72,6 +100,8 @@ extensions: []
|
|
|
72
100
|
extra_rdoc_files: []
|
|
73
101
|
files:
|
|
74
102
|
- lib/btscraper.rb
|
|
103
|
+
- lib/btscraper/checkinfohash.rb
|
|
104
|
+
- lib/btscraper/httpscrape.rb
|
|
75
105
|
- lib/btscraper/udpscrape.rb
|
|
76
106
|
homepage: https://github.com/sherkix/btscraper
|
|
77
107
|
licenses:
|
|
@@ -91,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
91
121
|
- !ruby/object:Gem::Version
|
|
92
122
|
version: '0'
|
|
93
123
|
requirements: []
|
|
94
|
-
rubygems_version:
|
|
124
|
+
rubygems_version: 4.0.0
|
|
95
125
|
specification_version: 4
|
|
96
126
|
summary: Scrape library for bittorrent trackers
|
|
97
127
|
test_files: []
|