the_mask 0.2.2 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/lib/the_mask.rb +1 -0
- data/lib/the_mask/mechanize_socks_support.rb +38 -0
- data/lib/the_mask/proxy_list.rb +29 -6
- data/lib/the_mask/socket.rb +50 -29
- data/lib/the_mask/version.rb +1 -1
- data/the_mask.gemspec +2 -1
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81556b7f9b9fb2c1bdf439166e30b19c53fefb31
|
4
|
+
data.tar.gz: 8a63e7102e20188ca209d1141524891241b963b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 43edf63a4c770bc64852dd9c7bd832792b7fc3e051ff8d4f70e4a1e908e7aa7f3e532b7df666869de15f9b59135bc159558767120ddadf9e250328a6d08077aa
|
7
|
+
data.tar.gz: 3d4918317db7ff14b716818d16bddf0c73585aa70a1d98fc97af2f91f3858168426427408f4fd0214e3d40e1a17700980479fec13cae84db088b3665919425ba
|
data/Gemfile
CHANGED
data/lib/the_mask.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'socksify'
|
2
|
+
require 'socksify/http'
|
3
|
+
|
4
|
+
module MechanizeSOCKSSupport
|
5
|
+
def set_proxy(addr, port, user = nil, pass = nil)
|
6
|
+
@socks = false
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
def fetch(uri, method = :get, headers = {}, params = [], referer = current_page, redirects = 0)
|
11
|
+
if @socks && !@socks_http.nil?
|
12
|
+
html = @socks_http.get URI(uri)
|
13
|
+
page = Struct.new(:uri, :body)
|
14
|
+
page.new(uri, html)
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Mechanize::HTTP::Agent
|
21
|
+
prepend MechanizeSOCKSSupport
|
22
|
+
attr_accessor :http, :socks
|
23
|
+
|
24
|
+
public
|
25
|
+
def set_socks(addr, port)
|
26
|
+
set_http unless @http
|
27
|
+
@socks_http = Net::HTTP::SOCKSProxy(addr, port)
|
28
|
+
@socks = true
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def set_http
|
33
|
+
@http = Net::HTTP::Persistent.new 'mechanize'
|
34
|
+
@http.idle_timeout = 5
|
35
|
+
@http.keep_alive = 300
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/the_mask/proxy_list.rb
CHANGED
@@ -1,13 +1,30 @@
|
|
1
1
|
module TheMask
|
2
2
|
class ProxyList
|
3
|
-
#TheMask::ProxyList::Proxy class
|
4
3
|
class Proxy
|
5
|
-
attr_accessor :ip, :port, :username, :password
|
4
|
+
attr_accessor :ip, :port, :username, :password, :type, :socks_version
|
5
|
+
HTTP_PROXY = :http
|
6
|
+
SOCKS_PROXY = :socks
|
7
|
+
SUPPORTED_SOCKS_VERSIONS = ['4', '5']
|
6
8
|
|
7
|
-
def initialize(
|
8
|
-
unless
|
9
|
+
def initialize(input_string = '')
|
10
|
+
unless input_string.empty?
|
9
11
|
# Proxy string format = ip:port:username:password
|
10
|
-
split_str =
|
12
|
+
split_str = input_string.split(':')
|
13
|
+
last_element = split_str[-1].to_s.downcase
|
14
|
+
|
15
|
+
# Check if proxy has SOCKS parameter enabled, by default a proxy will always be a HTTP/s proxy
|
16
|
+
if last_element.eql?(SOCKS_PROXY.to_s) && (split_str.length == 3 || split_str.length == 5)
|
17
|
+
@type = SOCKS_PROXY
|
18
|
+
socks_version = last_element[-1]
|
19
|
+
if SUPPORTED_SOCKS_VERSIONS.include?(socks_version)
|
20
|
+
@socks_version = socks_version.to_i
|
21
|
+
else
|
22
|
+
# Default version is the latest SOCKS, in this case ver. 5s
|
23
|
+
@socks_version = SUPPORTED_SOCKS_VERSIONS[-1].to_i
|
24
|
+
end
|
25
|
+
else
|
26
|
+
@type = HTTP_PROXY
|
27
|
+
end
|
11
28
|
@ip = split_str[0].to_s
|
12
29
|
@port = split_str[1].to_i
|
13
30
|
@username = split_str[2].to_s unless split_str[2].nil?
|
@@ -20,8 +37,14 @@ module TheMask
|
|
20
37
|
end
|
21
38
|
end
|
22
39
|
|
40
|
+
def is_SOCKS?
|
41
|
+
@type == SOCKS_PROXY
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_HTTP?
|
45
|
+
@type == HTTP_PROXY
|
46
|
+
end
|
23
47
|
end
|
24
|
-
#ProxyList class
|
25
48
|
attr_accessor :proxy_list
|
26
49
|
|
27
50
|
def initialize(arr = [])
|
data/lib/the_mask/socket.rb
CHANGED
@@ -1,17 +1,22 @@
|
|
1
|
+
require_relative 'mechanize_socks_support'
|
2
|
+
|
1
3
|
module TheMask
|
4
|
+
include MechanizeSOCKSSupport
|
5
|
+
|
2
6
|
class Socket
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
GENERAL_TIMEOUT = 5 #seconds
|
7
|
+
DEFAULT_OPEN_TIMEOUT = 3 # seconds
|
8
|
+
DEFAULT_READ_TIMEOUT = 3 # seconds
|
9
|
+
GENERAL_TIMEOUT = 5 # seconds
|
7
10
|
MAXIMUM_TRIES = 3
|
8
|
-
MINIMUM_PAGE_LENGTH = 100 #bytes
|
11
|
+
MINIMUM_PAGE_LENGTH = 100 # bytes
|
9
12
|
FORCE_READ = false
|
10
13
|
RESET_USER_AGENT = true
|
11
|
-
MIN_PROXY_RESPONSE_TIME = nil #seconds, default: nil = do not remove proxies
|
14
|
+
MIN_PROXY_RESPONSE_TIME = nil # seconds, default: nil = do not remove proxies
|
15
|
+
SOCKS_INCREASE_TIMEOUTS = 0.5 # increase timeout duration by specified magnitude if proxy used for retrieval is SOCKS4/5
|
12
16
|
|
13
17
|
def initialize(options = {})
|
14
18
|
@proxies = nil
|
19
|
+
@socks_increase_timeouts = options[:socks_increase_timeouts] || SOCKS_INCREASE_TIMEOUTS
|
15
20
|
@timeout = options[:timeout] || GENERAL_TIMEOUT
|
16
21
|
@max_tries = options[:max_tries] || MAXIMUM_TRIES
|
17
22
|
@force = options[:force] || FORCE_READ
|
@@ -20,20 +25,15 @@ module TheMask
|
|
20
25
|
@min_proxy_response_time = options[:min_proxy_response_time] || MIN_PROXY_RESPONSE_TIME
|
21
26
|
|
22
27
|
@agent = Mechanize.new
|
28
|
+
@agent.history.max_size = 0
|
23
29
|
|
24
|
-
@
|
25
|
-
@
|
30
|
+
@open_timeout = options[:open_timeout] || DEFAULT_OPEN_TIMEOUT
|
31
|
+
@read_timeout = options[:read_timeout] || DEFAULT_READ_TIMEOUT
|
26
32
|
|
27
|
-
|
28
|
-
if options[:proxy]
|
29
|
-
if options[:proxy][:username] && options[:proxy][:password]
|
30
|
-
@agent.set_proxy options[:proxy][:ip], options[:proxy][:port], options[:proxy][:username], options[:proxy][:password]
|
31
|
-
else
|
32
|
-
@agent.set_proxy options[:proxy][:ip], options[:proxy][:port]
|
33
|
-
end
|
34
|
-
end
|
35
|
-
else
|
33
|
+
if options[:proxies]
|
36
34
|
@proxies = TheMask::ProxyList.new(options[:proxies])
|
35
|
+
else
|
36
|
+
@proxies = TheMask::ProxyList.new([options[:proxy]])
|
37
37
|
end
|
38
38
|
|
39
39
|
@agent.user_agent = TheMask.get_random_user_agent_str unless @reset_user_agent
|
@@ -41,11 +41,12 @@ module TheMask
|
|
41
41
|
|
42
42
|
def open_url(url)
|
43
43
|
read_proc = Proc.new do
|
44
|
-
proxy = nil #Selected proxy
|
45
|
-
tries = 0 #Total URL retrieval tries
|
46
|
-
page_data = nil #Retrieved page html data
|
44
|
+
proxy = nil # Selected proxy
|
45
|
+
tries = 0 # Total URL retrieval tries
|
46
|
+
page_data = nil # Retrieved page html data
|
47
|
+
timeout_adjustments = nil # Adjustments to timeouts based on proxy type
|
47
48
|
|
48
|
-
#Variables for timing the GET request
|
49
|
+
# Variables for timing the GET request
|
49
50
|
end_time = nil
|
50
51
|
start_time = nil
|
51
52
|
|
@@ -63,30 +64,42 @@ module TheMask
|
|
63
64
|
begin
|
64
65
|
proxy = @proxies.get_proxy
|
65
66
|
|
66
|
-
if proxy.
|
67
|
-
@agent.
|
67
|
+
if proxy.is_SOCKS?
|
68
|
+
@agent.agent.set_socks proxy.ip, proxy.port
|
69
|
+
timeout_adjustments = calculate_timeouts @socks_increase_timeouts
|
70
|
+
elsif proxy.is_HTTP?
|
71
|
+
if proxy.username && proxy.password
|
72
|
+
@agent.set_proxy proxy.ip, proxy.port, proxy.username, proxy.password
|
73
|
+
else
|
74
|
+
@agent.set_proxy proxy.ip, proxy.port
|
75
|
+
end
|
76
|
+
timeout_adjustments = calculate_timeouts
|
68
77
|
else
|
69
|
-
|
78
|
+
raise "TheMask: unknown proxy type '#{proxy.type}'."
|
70
79
|
end
|
71
80
|
end
|
72
81
|
end
|
73
82
|
rescue Timeout::ExitException => e
|
74
|
-
#Exception timeout from mechanize
|
83
|
+
# Exception timeout from mechanize
|
75
84
|
@proxies.remove_proxy!(proxy)
|
76
85
|
retry
|
77
86
|
end
|
78
|
-
|
79
|
-
|
87
|
+
@agent.open_timeout = timeout_adjustments[:open_timeout]
|
88
|
+
@agent.read_timeout = timeout_adjustments[:read_timeout]
|
89
|
+
Timeout::timeout(timeout_adjustments[:timeout]) do
|
80
90
|
start_time = Time.now
|
81
91
|
page_data = @agent.get url
|
82
92
|
end_time = Time.now
|
83
93
|
end
|
94
|
+
|
84
95
|
rescue Errno::ETIMEDOUT => e
|
85
96
|
retry
|
86
97
|
rescue Net::HTTP::Persistent::Error => e
|
87
98
|
retry
|
88
99
|
rescue Timeout::Error => e
|
89
100
|
retry
|
101
|
+
rescue SOCKSError => e
|
102
|
+
retry
|
90
103
|
rescue SignalException => e
|
91
104
|
retry
|
92
105
|
rescue Net::HTTPNotFound => e
|
@@ -104,7 +117,7 @@ module TheMask
|
|
104
117
|
end
|
105
118
|
|
106
119
|
unless @min_proxy_response_time.nil? || start_time.nil? || end_time.nil?
|
107
|
-
#Remove proxy from list if response time is longer than the minimum response time provided in options
|
120
|
+
# Remove proxy from list if response time is longer than the minimum response time provided in options
|
108
121
|
response_time = end_time - start_time
|
109
122
|
@proxies.remove_proxy!(proxy) if response_time > @min_proxy_response_time
|
110
123
|
end
|
@@ -115,7 +128,6 @@ module TheMask
|
|
115
128
|
if @force
|
116
129
|
while true
|
117
130
|
data = read_proc.call
|
118
|
-
|
119
131
|
unless data.nil? || data.body.to_s.empty? || data.body.to_s.length < @min_page_length
|
120
132
|
return data.body
|
121
133
|
end
|
@@ -124,5 +136,14 @@ module TheMask
|
|
124
136
|
|
125
137
|
read_proc.call.body
|
126
138
|
end
|
139
|
+
|
140
|
+
private
|
141
|
+
def calculate_timeouts(increase_magnitude = 0)
|
142
|
+
{
|
143
|
+
timeout: (@timeout + (@timeout * increase_magnitude)),
|
144
|
+
open_timeout: (@open_timeout + (@open_timeout * increase_magnitude)),
|
145
|
+
read_timeout: (@read_timeout + (@read_timeout * increase_magnitude))
|
146
|
+
}
|
147
|
+
end
|
127
148
|
end
|
128
149
|
end
|
data/lib/the_mask/version.rb
CHANGED
data/the_mask.gemspec
CHANGED
@@ -23,5 +23,6 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency 'rake', '~> 10.0'
|
24
24
|
spec.add_development_dependency 'rspec', '~> 0'
|
25
25
|
|
26
|
-
spec.add_dependency 'mechanize', '~> 2.7'
|
26
|
+
spec.add_dependency 'mechanize', '~> 2.7.4'
|
27
|
+
spec.add_dependency 'socksify', '~> 1.7.0'
|
27
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: the_mask
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Saoud Khalifah
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,14 +58,28 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 2.7.4
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 2.7.4
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: socksify
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.7.0
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.7.0
|
69
83
|
description: TheMask provides functionality for the purposes of data mining.
|
70
84
|
email:
|
71
85
|
- saouddk@gmail.com
|
@@ -85,6 +99,7 @@ files:
|
|
85
99
|
- bin/setup
|
86
100
|
- lib/the_mask.rb
|
87
101
|
- lib/the_mask/connect.rb
|
102
|
+
- lib/the_mask/mechanize_socks_support.rb
|
88
103
|
- lib/the_mask/proxy_list.rb
|
89
104
|
- lib/the_mask/socket.rb
|
90
105
|
- lib/the_mask/user_agents.rb
|
@@ -115,3 +130,4 @@ signing_key:
|
|
115
130
|
specification_version: 4
|
116
131
|
summary: Socket obfuscation for purposes of data mining/gathering.
|
117
132
|
test_files: []
|
133
|
+
has_rdoc:
|