the_mask 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2e23912c7fb5129bb925acbd4e9e609ebe056919
4
- data.tar.gz: 552488893034e2a9833f794c8d09af9e9b246531
3
+ metadata.gz: d67508982b071a8217ee5ca318f3ad041dc601d1
4
+ data.tar.gz: c29f73ee46620975f3671caf575a9c34726a1047
5
5
  SHA512:
6
- metadata.gz: cf1ecb0d2bf67090fc760a5bb5f198127a3050e6c442c62ddb50a2c3d99c66f59abc177152d58bc082c59ead384087a3d6b620ec648bc14670b9ace58376fb63
7
- data.tar.gz: c5459ce1dd5e26f70b3e7f7202294bf99339ec48df2d6671d7f4edb58bd101c720def4dd7d043e5f6032210e96ed4ad593fa316a6963359934c4e73738bb127e
6
+ metadata.gz: d6181d81ef0efb3c02f36f70a4731bf8f55ae62e186ba09f3cbd996300e2a288ac0af2568ae1df0746b109866dd3580fa7de09c666e3ef9a08c0a5576ebc8266
7
+ data.tar.gz: e6b3d53f7d9a8465d825e0273194ac89e55299f5443a7e33e91d674fb48fadc69c2f7f9543a7bd145a98bd739988964752c07efac847530ed8bc98693e6f2259
data/README.md CHANGED
@@ -4,6 +4,13 @@
4
4
 
5
5
  Tired of issues involved with data mining? Put on The Mask and try data mining designed for the next generation.
6
6
 
7
+ ## Features
8
+
9
+ - A powerful internal proxy list manager that prioritizes successful mining and retrieval of data.
10
+ - Full control over the data mining process through various configurations available (see below for options).
11
+ - Socket obfuscation (in-progress)
12
+
13
+
7
14
  ## Installation
8
15
 
9
16
  Add this line to your application's Gemfile:
@@ -12,36 +19,40 @@ Add this line to your application's Gemfile:
12
19
  gem 'the_mask'
13
20
  ```
14
21
 
15
- And then execute:
16
-
17
- $ bundle
18
-
19
22
  Or install it yourself as:
20
23
 
21
24
  $ gem install the_mask
22
25
 
23
26
  ## Usage
24
27
 
25
- `mask_connect = TheMask::Connect.new(read_timeout: 4, open_timeout: 4, max_tries: 4)
26
- mask_connect.open_url 'http://www.abcdefg.com'
27
- `
28
+ ```ruby
29
+ mask_connect = TheMask::Connect.new(read_timeout: 4, open_timeout: 4, max_tries: 4)
30
+ mask_connect.open_url('http://www.abcdefg.com')
31
+ ```
28
32
 
29
33
  This will return the body data from the supplied URL.
30
34
 
31
35
  Available options:
32
- `read_timeout = Read timeout in seconds (default: 3)`
33
- `open_timeout = Open timeout in seconds (default: 3)`
34
- `timeout = Timeout for whole procedure in seconds (default: 5)`
35
- `max_tries = Maximum attempts in reading the page (default: 3)`
36
- `min_page_length = Minimum page length in bytes, if not satisfied, reattempt retrieval (default: 100 bytes)`
37
- `reset_ua = Reset user agent on every request. (default: true)`
38
- `force = Force continuous opening of page until data is retrieved (default: false)`
36
+ ```
37
+ read_timeout = Read timeout in seconds (default: 3)
38
+ open_timeout = Open timeout in seconds (default: 3)
39
+ timeout = Timeout for whole procedure in seconds (default: 5)
40
+ max_tries = Maximum attempts in reading the page (default: 3)
41
+ min_page_length = Minimum page length in bytes, if not satisfied, reattempt retrieval (default: 100 bytes)
42
+ reset_ua = Reset user agent on every request. (default: true)
43
+ force = Force continuous opening of page until data is retrieved (default: false)
44
+ min_proxy_response_time = Minimum response time for proxies in seconds. After executing open_url, if proxy response time is over set minimum, proxy will be removed from internal proxy list (default: no minimum response time)
45
+ ```
39
46
 
40
47
  Proxy options example:
41
- `mask_connect = TheMask::Connect.new(proxy: { ip: '127.0.0.1', port: 8080, username: 'asd333', password: 'asd333' })`
48
+ ```ruby
49
+ mask_connect = TheMask::Connect.new(proxy: { ip: '127.0.0.1', port: 8080, username: 'asd333', password: 'asd333' })
50
+ ```
42
51
 
43
52
  Or supply multiple proxies with an array:
44
- `mask_connect = TheMask::Connect.new(proxies: ['111.11.1.1:80', '10.10.101.10:800', '192.10.10.1:80:sdad:asdasd'])`
53
+ ```ruby
54
+ mask_connect = TheMask::Connect.new(proxies: ['111.11.1.1:80', '10.10.101.10:800', '192.10.10.1:80:sdad:asdasd'])
55
+ ```
45
56
 
46
57
 
47
58
  ## Development
@@ -8,6 +8,7 @@ module TheMask
8
8
  MINIMUM_PAGE_LENGTH = 100 #bytes
9
9
  FORCE_READ = false
10
10
  RESET_USER_AGENT = true
11
+ MIN_PROXY_RESPONSE_TIME = nil #seconds, default: nil = do not remove proxies
11
12
 
12
13
  def initialize(options = {})
13
14
  @proxies = nil
@@ -16,6 +17,7 @@ module TheMask
16
17
  @force = options[:force] || FORCE_READ
17
18
  @min_page_length = options[:min_page_length] || MINIMUM_PAGE_LENGTH
18
19
  @reset_user_agent = options[:reset_ua] || RESET_USER_AGENT
20
+ @min_proxy_response_time = options[:min_proxy_response_time] || MIN_PROXY_RESPONSE_TIME
19
21
 
20
22
  @agent = Mechanize.new
21
23
 
@@ -39,9 +41,14 @@ module TheMask
39
41
 
40
42
  def open_url(url)
41
43
  read_proc = Proc.new do
44
+ proxy = nil #Selected proxy
42
45
  tries = 0 #Total URL retrieval tries
43
46
  page_data = nil #Retrieved page html data
44
47
 
48
+ #Variables for timing the GET request
49
+ end_time = nil
50
+ start_time = nil
51
+
45
52
  begin
46
53
  tries += 1
47
54
 
@@ -51,8 +58,6 @@ module TheMask
51
58
 
52
59
  @agent.user_agent = TheMask.get_random_user_agent_str if @reset_user_agent
53
60
 
54
- proxy = nil
55
-
56
61
  begin
57
62
  unless @proxies.nil?
58
63
  begin
@@ -72,7 +77,9 @@ module TheMask
72
77
  end
73
78
 
74
79
  Timeout::timeout(@timeout) do
80
+ start_time = Time.now
75
81
  page_data = @agent.get url
82
+ end_time = Time.now
76
83
  end
77
84
  rescue Errno::ETIMEDOUT => e
78
85
  retry
@@ -95,6 +102,13 @@ module TheMask
95
102
  rescue
96
103
  retry
97
104
  end
105
+
106
+ unless @min_proxy_response_time.nil? || start_time.nil? || end_time.nil?
107
+ #Remove proxy from list if response time is longer than the minimum response time provided in options
108
+ response_time = end_time - start_time
109
+ @proxies.remove_proxy!(proxy) if response_time > @min_proxy_response_time
110
+ end
111
+
98
112
  page_data
99
113
  end
100
114
 
@@ -1,3 +1,3 @@
1
1
  module TheMask
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: the_mask
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Saoud Khalifah
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-11-19 00:00:00.000000000 Z
11
+ date: 2015-11-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler