the_mask 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -7
- data/lib/the_mask/proxy_list.rb +16 -2
- data/lib/the_mask/socket.rb +19 -8
- data/lib/the_mask/version.rb +1 -1
- metadata +2 -5
- data/the_mask-0.1.0.gem +0 -0
- data/the_mask-0.1.1.gem +0 -0
- data/the_mask-0.1.2.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e23912c7fb5129bb925acbd4e9e609ebe056919
|
4
|
+
data.tar.gz: 552488893034e2a9833f794c8d09af9e9b246531
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf1ecb0d2bf67090fc760a5bb5f198127a3050e6c442c62ddb50a2c3d99c66f59abc177152d58bc082c59ead384087a3d6b620ec648bc14670b9ace58376fb63
|
7
|
+
data.tar.gz: c5459ce1dd5e26f70b3e7f7202294bf99339ec48df2d6671d7f4edb58bd101c720def4dd7d043e5f6032210e96ed4ad593fa316a6963359934c4e73738bb127e
|
data/README.md
CHANGED
@@ -29,13 +29,13 @@ Or install it yourself as:
|
|
29
29
|
This will return the body data from the supplied URL.
|
30
30
|
|
31
31
|
Available options:
|
32
|
-
`read_timeout
|
33
|
-
`open_timeout
|
34
|
-
`timeout
|
35
|
-
`max_tries
|
36
|
-
`min_page_length
|
37
|
-
`reset_ua
|
38
|
-
`force
|
32
|
+
`read_timeout = Read timeout in seconds (default: 3)`
|
33
|
+
`open_timeout = Open timeout in seconds (default: 3)`
|
34
|
+
`timeout = Timeout for whole procedure in seconds (default: 5)`
|
35
|
+
`max_tries = Maximum attempts in reading the page (default: 3)`
|
36
|
+
`min_page_length = Minimum page length in bytes, if not satisfied, reattempt retrieval (default: 100 bytes)`
|
37
|
+
`reset_ua = Reset user agent on every request. (default: true)`
|
38
|
+
`force = Force continuous opening of page until data is retrieved (default: false)`
|
39
39
|
|
40
40
|
Proxy options example:
|
41
41
|
`mask_connect = TheMask::Connect.new(proxy: { ip: '127.0.0.1', port: 8080, username: 'asd333', password: 'asd333' })`
|
data/lib/the_mask/proxy_list.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
module TheMask
|
2
2
|
class ProxyList
|
3
|
+
#TheMask::ProxyList::Proxy class
|
3
4
|
class Proxy
|
4
5
|
attr_accessor :ip, :port, :username, :password
|
5
6
|
|
@@ -20,16 +21,29 @@ module TheMask
|
|
20
21
|
end
|
21
22
|
|
22
23
|
end
|
24
|
+
#ProxyList class
|
25
|
+
attr_accessor :proxy_list
|
23
26
|
|
24
27
|
def initialize(arr = [])
|
25
28
|
@proxy_list ||= []
|
26
|
-
|
29
|
+
|
30
|
+
arr.each do |element|
|
31
|
+
@proxy_list << [TheMask::ProxyList::Proxy.new(element), 0] unless arr.empty?
|
32
|
+
end
|
27
33
|
end
|
28
34
|
|
29
35
|
def get_proxy
|
30
|
-
|
36
|
+
if @proxy_list.empty?
|
37
|
+
raise "Tried to get_proxy when proxy list is empty. Check that your input proxy list is populated."
|
38
|
+
end
|
39
|
+
|
40
|
+
@proxy_list = @proxy_list.sort_by(&:last) # Least used proxy list sort by 2nd element in inner array
|
31
41
|
@proxy_list[0] = [@proxy_list[0][0], @proxy_list[0][1] + 1]
|
32
42
|
@proxy_list[0][0]
|
33
43
|
end
|
44
|
+
|
45
|
+
def remove_proxy!(proxy)
|
46
|
+
@proxy_list.delete(proxy)
|
47
|
+
end
|
34
48
|
end
|
35
49
|
end
|
data/lib/the_mask/socket.rb
CHANGED
@@ -39,8 +39,9 @@ module TheMask
|
|
39
39
|
|
40
40
|
def open_url(url)
|
41
41
|
read_proc = Proc.new do
|
42
|
-
tries = 0
|
43
|
-
page_data = nil
|
42
|
+
tries = 0 #Total URL retrieval tries
|
43
|
+
page_data = nil #Retrieved page html data
|
44
|
+
|
44
45
|
begin
|
45
46
|
tries += 1
|
46
47
|
|
@@ -50,14 +51,24 @@ module TheMask
|
|
50
51
|
|
51
52
|
@agent.user_agent = TheMask.get_random_user_agent_str if @reset_user_agent
|
52
53
|
|
53
|
-
|
54
|
-
|
54
|
+
proxy = nil
|
55
|
+
|
56
|
+
begin
|
57
|
+
unless @proxies.nil?
|
58
|
+
begin
|
59
|
+
proxy = @proxies.get_proxy
|
55
60
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
61
|
+
if proxy.username && proxy.password
|
62
|
+
@agent.set_proxy proxy.ip, proxy.port, proxy.username, proxy.password
|
63
|
+
else
|
64
|
+
@agent.set_proxy proxy.ip, proxy.port
|
65
|
+
end
|
66
|
+
end
|
60
67
|
end
|
68
|
+
rescue Timeout::ExitException => e
|
69
|
+
#Exception timeout from mechanize
|
70
|
+
@proxies.remove_proxy!(proxy)
|
71
|
+
retry
|
61
72
|
end
|
62
73
|
|
63
74
|
Timeout::timeout(@timeout) do
|
data/lib/the_mask/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: the_mask
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Saoud Khalifah
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -89,9 +89,6 @@ files:
|
|
89
89
|
- lib/the_mask/socket.rb
|
90
90
|
- lib/the_mask/user_agents.rb
|
91
91
|
- lib/the_mask/version.rb
|
92
|
-
- the_mask-0.1.0.gem
|
93
|
-
- the_mask-0.1.1.gem
|
94
|
-
- the_mask-0.1.2.gem
|
95
92
|
- the_mask.gemspec
|
96
93
|
homepage: http://github.com/saouddk/the_mask
|
97
94
|
licenses:
|
data/the_mask-0.1.0.gem
DELETED
Binary file
|
data/the_mask-0.1.1.gem
DELETED
Binary file
|
data/the_mask-0.1.2.gem
DELETED
Binary file
|