the_mask 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/lib/the_mask.rb +1 -0
- data/lib/the_mask/mechanize_socks_support.rb +38 -0
- data/lib/the_mask/proxy_list.rb +29 -6
- data/lib/the_mask/socket.rb +50 -29
- data/lib/the_mask/version.rb +1 -1
- data/the_mask.gemspec +2 -1
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81556b7f9b9fb2c1bdf439166e30b19c53fefb31
|
4
|
+
data.tar.gz: 8a63e7102e20188ca209d1141524891241b963b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 43edf63a4c770bc64852dd9c7bd832792b7fc3e051ff8d4f70e4a1e908e7aa7f3e532b7df666869de15f9b59135bc159558767120ddadf9e250328a6d08077aa
|
7
|
+
data.tar.gz: 3d4918317db7ff14b716818d16bddf0c73585aa70a1d98fc97af2f91f3858168426427408f4fd0214e3d40e1a17700980479fec13cae84db088b3665919425ba
|
data/Gemfile
CHANGED
data/lib/the_mask.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'socksify'
|
2
|
+
require 'socksify/http'
|
3
|
+
|
4
|
+
module MechanizeSOCKSSupport
|
5
|
+
def set_proxy(addr, port, user = nil, pass = nil)
|
6
|
+
@socks = false
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
def fetch(uri, method = :get, headers = {}, params = [], referer = current_page, redirects = 0)
|
11
|
+
if @socks && !@socks_http.nil?
|
12
|
+
html = @socks_http.get URI(uri)
|
13
|
+
page = Struct.new(:uri, :body)
|
14
|
+
page.new(uri, html)
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Mechanize::HTTP::Agent
|
21
|
+
prepend MechanizeSOCKSSupport
|
22
|
+
attr_accessor :http, :socks
|
23
|
+
|
24
|
+
public
|
25
|
+
def set_socks(addr, port)
|
26
|
+
set_http unless @http
|
27
|
+
@socks_http = Net::HTTP::SOCKSProxy(addr, port)
|
28
|
+
@socks = true
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def set_http
|
33
|
+
@http = Net::HTTP::Persistent.new 'mechanize'
|
34
|
+
@http.idle_timeout = 5
|
35
|
+
@http.keep_alive = 300
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/the_mask/proxy_list.rb
CHANGED
@@ -1,13 +1,30 @@
|
|
1
1
|
module TheMask
|
2
2
|
class ProxyList
|
3
|
-
#TheMask::ProxyList::Proxy class
|
4
3
|
class Proxy
|
5
|
-
attr_accessor :ip, :port, :username, :password
|
4
|
+
attr_accessor :ip, :port, :username, :password, :type, :socks_version
|
5
|
+
HTTP_PROXY = :http
|
6
|
+
SOCKS_PROXY = :socks
|
7
|
+
SUPPORTED_SOCKS_VERSIONS = ['4', '5']
|
6
8
|
|
7
|
-
def initialize(
|
8
|
-
unless
|
9
|
+
def initialize(input_string = '')
|
10
|
+
unless input_string.empty?
|
9
11
|
# Proxy string format = ip:port:username:password
|
10
|
-
split_str =
|
12
|
+
split_str = input_string.split(':')
|
13
|
+
last_element = split_str[-1].to_s.downcase
|
14
|
+
|
15
|
+
# Check if proxy has SOCKS parameter enabled, by default a proxy will always be a HTTP/s proxy
|
16
|
+
if last_element.eql?(SOCKS_PROXY.to_s) && (split_str.length == 3 || split_str.length == 5)
|
17
|
+
@type = SOCKS_PROXY
|
18
|
+
socks_version = last_element[-1]
|
19
|
+
if SUPPORTED_SOCKS_VERSIONS.include?(socks_version)
|
20
|
+
@socks_version = socks_version.to_i
|
21
|
+
else
|
22
|
+
# Default version is the latest SOCKS, in this case ver. 5s
|
23
|
+
@socks_version = SUPPORTED_SOCKS_VERSIONS[-1].to_i
|
24
|
+
end
|
25
|
+
else
|
26
|
+
@type = HTTP_PROXY
|
27
|
+
end
|
11
28
|
@ip = split_str[0].to_s
|
12
29
|
@port = split_str[1].to_i
|
13
30
|
@username = split_str[2].to_s unless split_str[2].nil?
|
@@ -20,8 +37,14 @@ module TheMask
|
|
20
37
|
end
|
21
38
|
end
|
22
39
|
|
40
|
+
def is_SOCKS?
|
41
|
+
@type == SOCKS_PROXY
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_HTTP?
|
45
|
+
@type == HTTP_PROXY
|
46
|
+
end
|
23
47
|
end
|
24
|
-
#ProxyList class
|
25
48
|
attr_accessor :proxy_list
|
26
49
|
|
27
50
|
def initialize(arr = [])
|
data/lib/the_mask/socket.rb
CHANGED
@@ -1,17 +1,22 @@
|
|
1
|
+
require_relative 'mechanize_socks_support'
|
2
|
+
|
1
3
|
module TheMask
|
4
|
+
include MechanizeSOCKSSupport
|
5
|
+
|
2
6
|
class Socket
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
GENERAL_TIMEOUT = 5 #seconds
|
7
|
+
DEFAULT_OPEN_TIMEOUT = 3 # seconds
|
8
|
+
DEFAULT_READ_TIMEOUT = 3 # seconds
|
9
|
+
GENERAL_TIMEOUT = 5 # seconds
|
7
10
|
MAXIMUM_TRIES = 3
|
8
|
-
MINIMUM_PAGE_LENGTH = 100 #bytes
|
11
|
+
MINIMUM_PAGE_LENGTH = 100 # bytes
|
9
12
|
FORCE_READ = false
|
10
13
|
RESET_USER_AGENT = true
|
11
|
-
MIN_PROXY_RESPONSE_TIME = nil #seconds, default: nil = do not remove proxies
|
14
|
+
MIN_PROXY_RESPONSE_TIME = nil # seconds, default: nil = do not remove proxies
|
15
|
+
SOCKS_INCREASE_TIMEOUTS = 0.5 # increase timeout duration by specified magnitude if proxy used for retrieval is SOCKS4/5
|
12
16
|
|
13
17
|
def initialize(options = {})
|
14
18
|
@proxies = nil
|
19
|
+
@socks_increase_timeouts = options[:socks_increase_timeouts] || SOCKS_INCREASE_TIMEOUTS
|
15
20
|
@timeout = options[:timeout] || GENERAL_TIMEOUT
|
16
21
|
@max_tries = options[:max_tries] || MAXIMUM_TRIES
|
17
22
|
@force = options[:force] || FORCE_READ
|
@@ -20,20 +25,15 @@ module TheMask
|
|
20
25
|
@min_proxy_response_time = options[:min_proxy_response_time] || MIN_PROXY_RESPONSE_TIME
|
21
26
|
|
22
27
|
@agent = Mechanize.new
|
28
|
+
@agent.history.max_size = 0
|
23
29
|
|
24
|
-
@
|
25
|
-
@
|
30
|
+
@open_timeout = options[:open_timeout] || DEFAULT_OPEN_TIMEOUT
|
31
|
+
@read_timeout = options[:read_timeout] || DEFAULT_READ_TIMEOUT
|
26
32
|
|
27
|
-
|
28
|
-
if options[:proxy]
|
29
|
-
if options[:proxy][:username] && options[:proxy][:password]
|
30
|
-
@agent.set_proxy options[:proxy][:ip], options[:proxy][:port], options[:proxy][:username], options[:proxy][:password]
|
31
|
-
else
|
32
|
-
@agent.set_proxy options[:proxy][:ip], options[:proxy][:port]
|
33
|
-
end
|
34
|
-
end
|
35
|
-
else
|
33
|
+
if options[:proxies]
|
36
34
|
@proxies = TheMask::ProxyList.new(options[:proxies])
|
35
|
+
else
|
36
|
+
@proxies = TheMask::ProxyList.new([options[:proxy]])
|
37
37
|
end
|
38
38
|
|
39
39
|
@agent.user_agent = TheMask.get_random_user_agent_str unless @reset_user_agent
|
@@ -41,11 +41,12 @@ module TheMask
|
|
41
41
|
|
42
42
|
def open_url(url)
|
43
43
|
read_proc = Proc.new do
|
44
|
-
proxy = nil #Selected proxy
|
45
|
-
tries = 0 #Total URL retrieval tries
|
46
|
-
page_data = nil #Retrieved page html data
|
44
|
+
proxy = nil # Selected proxy
|
45
|
+
tries = 0 # Total URL retrieval tries
|
46
|
+
page_data = nil # Retrieved page html data
|
47
|
+
timeout_adjustments = nil # Adjustments to timeouts based on proxy type
|
47
48
|
|
48
|
-
#Variables for timing the GET request
|
49
|
+
# Variables for timing the GET request
|
49
50
|
end_time = nil
|
50
51
|
start_time = nil
|
51
52
|
|
@@ -63,30 +64,42 @@ module TheMask
|
|
63
64
|
begin
|
64
65
|
proxy = @proxies.get_proxy
|
65
66
|
|
66
|
-
if proxy.
|
67
|
-
@agent.
|
67
|
+
if proxy.is_SOCKS?
|
68
|
+
@agent.agent.set_socks proxy.ip, proxy.port
|
69
|
+
timeout_adjustments = calculate_timeouts @socks_increase_timeouts
|
70
|
+
elsif proxy.is_HTTP?
|
71
|
+
if proxy.username && proxy.password
|
72
|
+
@agent.set_proxy proxy.ip, proxy.port, proxy.username, proxy.password
|
73
|
+
else
|
74
|
+
@agent.set_proxy proxy.ip, proxy.port
|
75
|
+
end
|
76
|
+
timeout_adjustments = calculate_timeouts
|
68
77
|
else
|
69
|
-
|
78
|
+
raise "TheMask: unknown proxy type '#{proxy.type}'."
|
70
79
|
end
|
71
80
|
end
|
72
81
|
end
|
73
82
|
rescue Timeout::ExitException => e
|
74
|
-
#Exception timeout from mechanize
|
83
|
+
# Exception timeout from mechanize
|
75
84
|
@proxies.remove_proxy!(proxy)
|
76
85
|
retry
|
77
86
|
end
|
78
|
-
|
79
|
-
|
87
|
+
@agent.open_timeout = timeout_adjustments[:open_timeout]
|
88
|
+
@agent.read_timeout = timeout_adjustments[:read_timeout]
|
89
|
+
Timeout::timeout(timeout_adjustments[:timeout]) do
|
80
90
|
start_time = Time.now
|
81
91
|
page_data = @agent.get url
|
82
92
|
end_time = Time.now
|
83
93
|
end
|
94
|
+
|
84
95
|
rescue Errno::ETIMEDOUT => e
|
85
96
|
retry
|
86
97
|
rescue Net::HTTP::Persistent::Error => e
|
87
98
|
retry
|
88
99
|
rescue Timeout::Error => e
|
89
100
|
retry
|
101
|
+
rescue SOCKSError => e
|
102
|
+
retry
|
90
103
|
rescue SignalException => e
|
91
104
|
retry
|
92
105
|
rescue Net::HTTPNotFound => e
|
@@ -104,7 +117,7 @@ module TheMask
|
|
104
117
|
end
|
105
118
|
|
106
119
|
unless @min_proxy_response_time.nil? || start_time.nil? || end_time.nil?
|
107
|
-
#Remove proxy from list if response time is longer than the minimum response time provided in options
|
120
|
+
# Remove proxy from list if response time is longer than the minimum response time provided in options
|
108
121
|
response_time = end_time - start_time
|
109
122
|
@proxies.remove_proxy!(proxy) if response_time > @min_proxy_response_time
|
110
123
|
end
|
@@ -115,7 +128,6 @@ module TheMask
|
|
115
128
|
if @force
|
116
129
|
while true
|
117
130
|
data = read_proc.call
|
118
|
-
|
119
131
|
unless data.nil? || data.body.to_s.empty? || data.body.to_s.length < @min_page_length
|
120
132
|
return data.body
|
121
133
|
end
|
@@ -124,5 +136,14 @@ module TheMask
|
|
124
136
|
|
125
137
|
read_proc.call.body
|
126
138
|
end
|
139
|
+
|
140
|
+
private
|
141
|
+
def calculate_timeouts(increase_magnitude = 0)
|
142
|
+
{
|
143
|
+
timeout: (@timeout + (@timeout * increase_magnitude)),
|
144
|
+
open_timeout: (@open_timeout + (@open_timeout * increase_magnitude)),
|
145
|
+
read_timeout: (@read_timeout + (@read_timeout * increase_magnitude))
|
146
|
+
}
|
147
|
+
end
|
127
148
|
end
|
128
149
|
end
|
data/lib/the_mask/version.rb
CHANGED
data/the_mask.gemspec
CHANGED
@@ -23,5 +23,6 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency 'rake', '~> 10.0'
|
24
24
|
spec.add_development_dependency 'rspec', '~> 0'
|
25
25
|
|
26
|
-
spec.add_dependency 'mechanize', '~> 2.7'
|
26
|
+
spec.add_dependency 'mechanize', '~> 2.7.4'
|
27
|
+
spec.add_dependency 'socksify', '~> 1.7.0'
|
27
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: the_mask
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Saoud Khalifah
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,14 +58,28 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 2.7.4
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 2.7.4
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: socksify
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.7.0
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.7.0
|
69
83
|
description: TheMask provides functionality for the purposes of data mining.
|
70
84
|
email:
|
71
85
|
- saouddk@gmail.com
|
@@ -85,6 +99,7 @@ files:
|
|
85
99
|
- bin/setup
|
86
100
|
- lib/the_mask.rb
|
87
101
|
- lib/the_mask/connect.rb
|
102
|
+
- lib/the_mask/mechanize_socks_support.rb
|
88
103
|
- lib/the_mask/proxy_list.rb
|
89
104
|
- lib/the_mask/socket.rb
|
90
105
|
- lib/the_mask/user_agents.rb
|
@@ -115,3 +130,4 @@ signing_key:
|
|
115
130
|
specification_version: 4
|
116
131
|
summary: Socket obfuscation for purposes of data mining/gathering.
|
117
132
|
test_files: []
|
133
|
+
has_rdoc:
|