scraper_clients 9.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +26 -0
- data/bin/pry +17 -0
- data/bin/rspec +17 -0
- data/data/user_agents.txt +204 -0
- data/lib/clients.rb +24 -0
- data/lib/clients/errors.rb +16 -0
- data/lib/clients/ftp_client.rb +17 -0
- data/lib/clients/http_client.rb +152 -0
- data/lib/clients/http_client/response.rb +57 -0
- data/lib/clients/proxy6_client.rb +70 -0
- data/lib/clients/proxy_client.rb +14 -0
- data/lib/clients/proxy_list_client.rb +38 -0
- data/lib/clients/recaptcha/client.rb +48 -0
- data/lib/clients/recaptcha/response.rb +15 -0
- data/lib/clients/recaptcha/solver.rb +115 -0
- data/lib/clients/tor_client.rb +146 -0
- data/lib/clients/url_decoder.rb +8 -0
- data/lib/clients/version.rb +3 -0
- data/spec/lib/clients/http_client/response_spec.rb +197 -0
- data/spec/lib/clients/http_client_spec.rb +221 -0
- data/spec/lib/clients/tor_client_spec.rb +34 -0
- data/spec/spec_helper.rb +66 -0
- metadata +168 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: '083902af70aedc2d96125f969723f27e07349119bc1106ca70cae69f3628cd48'
|
4
|
+
data.tar.gz: 85a342743147184e65bd732e28bbb32719e40761e733525e8c4c8bf6fcaf4217
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a40213ebfa607fcd7b90a62b14aa810a9a9b6bd702775949b49499225459250b8987550c913ec74985fafba6f17750d1c47c254b57894024d4ae088a63850abc
|
7
|
+
data.tar.gz: 2f7abfaa1aabeb52023c4a5dbcdeed5fdaabbe18749687b6767cfb6149e6803402b6c0abe1da3ab7c89f732fa6555f7ecd892bb9c6a9ab52ebaead80e5959147
|
data/README.md
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Clients
|
2
|
+
=======
|
3
|
+
|
4
|
+
Clients contains instruments that are suited to make requests during scraping.
|
5
|
+
|
6
|
+
It includes following clients:
|
7
|
+
|
8
|
+
- **HttpClient:** to fetch web pages or files
|
9
|
+
- **FtpClient:** to fetch files from ftp
|
10
|
+
- **TorClient:** to proxy client requests via tor
|
11
|
+
- **Proxy6Client:** to proxy client request via any of proxy6 proxies
|
12
|
+
- **ProxyListClient:** to proxy client request via any of the proxies in the list in /tmp/clients_proxy_list.txt
|
13
|
+
- **ProxyList:** to select proxy client based on CLIENTS_PROXY_CLIENT variable (e.g. `list` or `proxy6`)
|
14
|
+
|
15
|
+
It also implements a special wrapper around of HttpClient:
|
16
|
+
|
17
|
+
- **Recaptcha::Client:** to visit websites behind recaptcha blocks
|
18
|
+
|
19
|
+
Important ENV variables:
|
20
|
+
|
21
|
+
- **CLIENTS_PROXY_CLIENT:** to control which proxy client will be selected by ProxyClient dispatcher (valid values: `list` or `proxy6`)
|
22
|
+
- **PROXY6_KEY:** API key for proxy6.net service
|
23
|
+
- **CAPTCHA_SOLVER_KEY:** API key for 2captcha.com service
|
24
|
+
- **TOR_PORT:** Base port for tor SOCKS5 proxy
|
25
|
+
- **TOR_CONTROL_PORT:** Base port for tor controls
|
26
|
+
- **HTTP_TOR_PORT:** Base port for http middleman proxy for TorClient (e.g. polipo)
|
data/bin/pry
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
#
|
4
|
+
# This file was generated by Bundler.
|
5
|
+
#
|
6
|
+
# The application 'pry' is installed as part of a gem, and
|
7
|
+
# this file is here to facilitate running it.
|
8
|
+
#
|
9
|
+
|
10
|
+
require "pathname"
|
11
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
12
|
+
Pathname.new(__FILE__).realpath)
|
13
|
+
|
14
|
+
require "rubygems"
|
15
|
+
require "bundler/setup"
|
16
|
+
|
17
|
+
load Gem.bin_path("pry", "pry")
|
data/bin/rspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
#
|
4
|
+
# This file was generated by Bundler.
|
5
|
+
#
|
6
|
+
# The application 'rspec' is installed as part of a gem, and
|
7
|
+
# this file is here to facilitate running it.
|
8
|
+
#
|
9
|
+
|
10
|
+
require "pathname"
|
11
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
12
|
+
Pathname.new(__FILE__).realpath)
|
13
|
+
|
14
|
+
require "rubygems"
|
15
|
+
require "bundler/setup"
|
16
|
+
|
17
|
+
load Gem.bin_path("rspec-core", "rspec")
|
@@ -0,0 +1,204 @@
|
|
1
|
+
Mozilla/5.0 (Amiga; U; AmigaOS 1.3; en; rv:1.8.1.19) Gecko/20081204 SeaMonkey/1.1.14
|
2
|
+
Mozilla/5.0 (AmigaOS; U; AmigaOS 1.3; en-US; rv:1.8.1.21) Gecko/20090303 SeaMonkey/1.1.15
|
3
|
+
Mozilla/5.0 (AmigaOS; U; AmigaOS 1.3; en; rv:1.8.1.19) Gecko/20081204 SeaMonkey/1.1.14
|
4
|
+
Mozilla/5.0 (BeOS; U; BeOS BeBox; fr; rv:1.9) Gecko/2008052906 BonEcho/2.0
|
5
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.1) Gecko/20061220 BonEcho/2.0.0.1
|
6
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.10) Gecko/20071128 BonEcho/2.0.0.10
|
7
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.17) Gecko/20080831 BonEcho/2.0.0.17
|
8
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.6) Gecko/20070731 BonEcho/2.0.0.6
|
9
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.7) Gecko/20070917 BonEcho/2.0.0.7
|
10
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1b2) Gecko/20060901 Firefox/2.0b2
|
11
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.9a1) Gecko/20051002 Firefox/1.6a1
|
12
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.9a1) Gecko/20060702 SeaMonkey/1.5a
|
13
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.10pre) Gecko/20080112 SeaMonkey/1.1.7pre
|
14
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.14) Gecko/20080429 BonEcho/2.0.0.14
|
15
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.17) Gecko/20080831 BonEcho/2.0.0.17
|
16
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.18) Gecko/20081114 BonEcho/2.0.0.18
|
17
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.21pre) Gecko/20090218 BonEcho/2.0.0.21pre
|
18
|
+
Mozilla/5.0 (Darwin; FreeBSD 5.6; en-GB; rv:1.8.1.17pre) Gecko/20080716 K-Meleon/1.5.0
|
19
|
+
Mozilla/5.0 (Darwin; FreeBSD 5.6; en-GB; rv:1.9.1b3pre)Gecko/20081211 K-Meleon/1.5.2
|
20
|
+
Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7
|
21
|
+
Mozilla/5.0 (Linux 2.4.18-18.7.x i686; U) Opera 6.03 [en]
|
22
|
+
Mozilla/5.0 (Linux 2.4.18-ltsp-1 i686; U) Opera 6.1 [en]
|
23
|
+
Mozilla/5.0 (Linux 2.4.19-16mdk i686; U) Opera 6.11 [en]
|
24
|
+
Mozilla/5.0 (Linux 2.4.21-0.13mdk i686; U) Opera 7.11 [en]
|
25
|
+
Mozilla/5.0 (Linux X86; U; Debian SID; it; rv:1.9.0.1) Gecko/2008070208 Debian IceWeasel/3.0.1
|
26
|
+
Mozilla/5.0 (Linux i686 ; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.70
|
27
|
+
Mozilla/5.0 (Linux i686; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0
|
28
|
+
Mozilla/5.0 (Linux i686; U; en; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.51
|
29
|
+
Mozilla/5.0 (Linux) Gecko Iceweasel (Debian) Mnenhy
|
30
|
+
Mozilla/5.0 (Linux; U) Opera 6.02 [en]
|
31
|
+
Mozilla/5.0 (Linux; U; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13
|
32
|
+
Mozilla/5.0 (MSIE 7.0; Macintosh; U; SunOS; X11; gu; SV1; InfoPath.2; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)
|
33
|
+
Mozilla/5.0 (Macintosh; ; Intel Mac OS X; fr; rv:1.8.1.1) Gecko/20061204 Opera
|
34
|
+
Mozilla/5.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4
|
35
|
+
Mozilla/5.0 (Macintosh; I; PPC Mac OS X Mach-O; en-US; rv:1.9a1) Gecko/20061204 Firefox/3.0a1
|
36
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20110608 SeaMonkey/2.1
|
37
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b11) Gecko/20110209 Firefox/ SeaMonkey/2.1b2
|
38
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b11pre) Gecko/20110126 Firefox/4.0b11pre
|
39
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b8) Gecko/20100101 Firefox/4.0b8
|
40
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0) Gecko/20100101 Firefox/9.0
|
41
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0a2) Gecko/20111101 Firefox/9.0a2
|
42
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24
|
43
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.31 (KHTML, like Gecko) Chrome/13.0.748.0 Safari/534.31
|
44
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1
|
45
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1
|
46
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
|
47
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19
|
48
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6) AppleWebKit/531.4 (KHTML, like Gecko) Version/4.0.3 Safari/531.4
|
49
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_0) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1200.0 Iron/21.0.1200.0 Safari/537.1
|
50
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_0) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4
|
51
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1
|
52
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.32 Safari/535.1
|
53
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1
|
54
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30
|
55
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30
|
56
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11
|
57
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24
|
58
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.698.0 Safari/534.24
|
59
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.357 Chrome/11.0.696.71 Safari/534.24
|
60
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30
|
61
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24
|
62
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) Iron/11.0.700.2 Chrome/11.0.700.2 Safari/534.24
|
63
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.283 Chrome/11.0.696.65 Safari/534.24
|
64
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.292 Chrome/11.0.696.68 Safari/534.24
|
65
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.310 Chrome/11.0.696.68 Safari/534.24
|
66
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.357 Chrome/11.0.696.71 Safari/534.24
|
67
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.209 Chrome/11.0.696.71 Safari/534.24
|
68
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.423 Chrome/11.0.696.71 Safari/534.24
|
69
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.471 Chrome/11.0.696.71 Safari/534.24
|
70
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.478 Chrome/11.0.696.71 Safari/534.24
|
71
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.494 Chrome/11.0.696.71 Safari/534.24
|
72
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1
|
73
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1
|
74
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1
|
75
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1
|
76
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24
|
77
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Iron/11.0.700.2 Chrome/11.0.700.2 Safari/534.24
|
78
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.494 Chrome/11.0.696.71 Safari/534.24
|
79
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.68 Safari/534.30
|
80
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/4.0.5 Safari/531.22.7
|
81
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1
|
82
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
|
83
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Iron/17.0.1000.0 Chrome/17.0.1000.0 Safari/535.11
|
84
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.11 Safari/535.19
|
85
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19
|
86
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2
|
87
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.54 Safari/535.2
|
88
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7
|
89
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1150.1 Iron/20.0.1150.1 Safari/536.11
|
90
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2
|
91
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.0 Safari/534.24
|
92
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30
|
93
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1
|
94
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1
|
95
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11
|
96
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2
|
97
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1
|
98
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1
|
99
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11
|
100
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
|
101
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19
|
102
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24
|
103
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.7 (KHTML, like Gecko) Iron/16.0.950.0 Chrome/16.0.950.0 Safari/535.7
|
104
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10
|
105
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.1 (KHTML, like Gecko) Iron/14.0.850.0 Chrome/14.0.850.0 Safari/535.1
|
106
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
|
107
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20
|
108
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/19.0.1047.0 Safari/535.22
|
109
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.7 (KHTML, like Gecko) Iron/16.0.950.0 Chrome/16.0.950.0 Safari/535.7
|
110
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/535.19 (KHTML, like Gecko) Iron/18.0.1050.0 Chrome/18.0.1050.0 Safari/535.19
|
111
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) Iron/19.0.1100.0 Chrome/19.0.1100.0 Safari/536.5
|
112
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8) AppleWebKit/536.15 (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
113
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1150.1 Iron/20.0.1150.1 Safari/536.11
|
114
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3
|
115
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1200.0 Iron/21.0.1200.0 Safari/537.1
|
116
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.6 Safari/537.11
|
117
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X; U; en; rv:1.8.0) Gecko/20060728 Firefox/1.5.0 Opera 9.27
|
118
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X; U; nb; rv:1.7.5) Gecko/20041110
|
119
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.4; rv:10.0.2) Gecko/20120217 Firefox/10.0.2 TenFourFox/G3
|
120
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.5; rv:10.0.2) Gecko/20120216 Firefox/10.0.2 TenFourFox/7450
|
121
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/534.50.2 (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
122
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/536.15+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
123
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/536.17+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
124
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/536.25+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
125
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/537.1+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
126
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/537.3+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
127
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1
|
128
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en) Opera 8.51
|
129
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en; rv:1.8.0) Gecko/20060728 Firefox/1.5.0
|
130
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0
|
131
|
+
Mozilla/5.0 (Macintosh; U; Intel 80486Mac OS X; en-US) AppleWebKit/528.16 (KHTML, like Gecko, Safari/528.16) OmniWeb/v622.8.0.112916
|
132
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.19) Gecko/2010062819 Firefox/3.0.19 Flock/2.6.1
|
133
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.4) Gecko/2008111323 Firefox/3.0.4 Flock/2.0.2
|
134
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.5) Gecko/2008121716 Firefox/3.0.5 Flock/2.0.3
|
135
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.9) Gecko/2009042318 Firefox/3.0.9 Wyzo/3.0.2
|
136
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.9) Gecko/2009042318 Firefox/3.0.9 Wyzo/3.0.3 GTB6
|
137
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.1b3pre) Gecko/20090223 SeaMonkey/2.0a3
|
138
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.1) Gecko/2008070206
|
139
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.10) Gecko/2009122115 Firefox/3.0.17
|
140
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.3) Gecko/2008100716 Firefox/3.0.3 Flock/2.0
|
141
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.3pre) Gecko/2008090704 GranParadiso/3.0.3pre
|
142
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.4) Gecko/2008111323 Firefox/3.0.4 Flock/2.0.2
|
143
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.6) Gecko/2009011912 Safari/525.27.1
|
144
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.9) Gecko/2009042318 Firefox/3.0.9 Wyzo/3.0.3
|
145
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1a2pre) Gecko/20080826052737 Minefield/3.1a2pre
|
146
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b1pre) Gecko/20080908170408 Minefield/3.1b1pre
|
147
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20081202 SeaMonkey/2.0a2
|
148
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20090204 Firefox/3.1b3pre
|
149
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b4) Gecko/20090423 Firefox/3.5b4 GTB5
|
150
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-gb) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
151
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
152
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; es-es) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
153
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-ch) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4
|
154
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-fr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
155
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; it-it) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
156
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
157
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ko-kr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
158
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; sv-se) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
159
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
160
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1
|
161
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; en-US) AppleWebKit/531.21.8+(KHTML, like Gecko, Safari/528.16) Version/5.10.3 OmniWeb/622.14.0
|
162
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16
|
163
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16
|
164
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Iron/10.0.650.0 Chrome/10.0.650.0 Safari/534.16
|
165
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16
|
166
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.133 Chrome/10.0.648.133 Safari/534.16
|
167
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.2 (KHTML, like Gecko) Chrome/6.0.453.1 Safari/534.2
|
168
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.457.0 Safari/534.3
|
169
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.458.0 Safari/534.3
|
170
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3
|
171
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.462.0 Safari/534.3
|
172
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Iron/6.0.475.1 Chrome/6.0.475.1 Safari/534.3
|
173
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.24 Safari/534.7
|
174
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Iron/7.0.520.0 Chrome/7.0.520.0 Safari/534.7
|
175
|
+
Mozilla/5.0(Windows; U; Windows NT 5.2; rv:1.9.2) Gecko/20100101 Firefox/3.6
|
176
|
+
Mozilla/5.0(Windows; U; Windows NT 7.0; rv:1.9.2) Gecko/20100101 Firefox/3.6
|
177
|
+
Mozilla/5.0(X11;U;Linux(x86_64);en;rv:1.9a8)Gecko/2007100619;GranParadiso/3.1
|
178
|
+
Mozilla/5.001 (Macintosh; N; PPC; ja) Gecko/25250101
|
179
|
+
Mozilla/5.001 (X11; U; Linux i686; rv:1.8.1.6; de-ch) Gecko/25250101 (ubuntu-feisty)
|
180
|
+
Mozilla/6.0 (Future Star Technologies Corp. Star-Blade OS; U; en-US) iNet Browser 2.5
|
181
|
+
Mozilla/6.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4
|
182
|
+
Mozilla/6.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:2.0.0.0) Gecko/20061028 Firefox/3.0
|
183
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US) Gecko/2009032609 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.7
|
184
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US) Gecko/2009032609 Chrome/2.0.172.6 Safari/530.7
|
185
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8
|
186
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8 (.NET CLR 3.5.30729)
|
187
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0
|
188
|
+
Mozilla/6.0 (Windows; U; Windows NT 7.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.9 (.NET CLR 3.5.30729)
|
189
|
+
Mozilla/6.0 (X11; U; Linux x86_64; en-US; rv:2.9.0.3) Gecko/2009022510 FreeBSD/ Sunrise/4.0.1/like Safari
|
190
|
+
Opera 9.7 (Windows NT 5.2; U; en)
|
191
|
+
Opera/10.50 (Windows NT 6.1; U; en-GB) Presto/2.2.2
|
192
|
+
Opera/10.60 (Windows NT 5.1; U; en-US) Presto/2.6.30 Version/10.60
|
193
|
+
Opera/10.60 (Windows NT 5.1; U; zh-cn) Presto/2.6.30 Version/10.60
|
194
|
+
Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01
|
195
|
+
Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10
|
196
|
+
Opera/9.80 (X11; Linux x86_64; U; de) Presto/2.2.15 Version/10.00
|
197
|
+
Opera/9.80 (X11; Linux x86_64; U; en) Presto/2.2.15 Version/10.00
|
198
|
+
Opera/9.80 (X11; Linux x86_64; U; en-GB) Presto/2.2.15 Version/10.01
|
199
|
+
Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50
|
200
|
+
Opera/9.80 (X11; Linux x86_64; U; it) Presto/2.2.15 Version/10.10
|
201
|
+
Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00
|
202
|
+
Opera/9.80 (X11; U; Linux i686; en-US; rv:1.9.2.3) Presto/2.2.15 Version/10.10
|
203
|
+
Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9
|
204
|
+
Opera/9.99 (X11; U; sk)
|
data/lib/clients.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "clients/tor_client"
|
2
|
+
require "clients/http_client"
|
3
|
+
require "clients/proxy_client"
|
4
|
+
require "clients/recaptcha/client"
|
5
|
+
|
6
|
+
module Clients
|
7
|
+
class << self
|
8
|
+
attr_writer :logger
|
9
|
+
|
10
|
+
def logger
|
11
|
+
@logger ||= ::Logger.new("log/clients.log").tap do |logger|
|
12
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
13
|
+
"#{datetime} #{progname} TID-#{Thread.current.object_id.to_s(36)} #{severity}: #{msg}\n"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Helper to get default setup of HttpClient with ProxyClient
|
19
|
+
def setup_http_client(logger: nil)
|
20
|
+
proxy = Clients::ProxyClient.from_env
|
21
|
+
Clients::HttpClient.new(proxy: proxy, logger: logger)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Clients
|
2
|
+
class ClientError < StandardError; end
|
3
|
+
|
4
|
+
class HttpClientError < ClientError
|
5
|
+
# rubocop:disable Style/SpecialGlobalVars:
|
6
|
+
def initialize(msg = "Failed request", url: nil, proxy: nil, cause: $!)
|
7
|
+
msg << " at #{url}" if url
|
8
|
+
msg << " via #{proxy.host}:#{proxy.port}" if proxy
|
9
|
+
msg << " caused by #{cause.class}: #{cause.message}" if cause
|
10
|
+
super msg
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class ProxyClientError < ClientError; end
|
15
|
+
class RecaptchaError < ClientError; end
|
16
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "net/ftp"
|
2
|
+
require "addressable/uri"
|
3
|
+
require "clients/url_decoder"
|
4
|
+
|
5
|
+
module Clients
|
6
|
+
class FtpClient
|
7
|
+
def get(url, &chunk_block)
|
8
|
+
uri = Addressable::URI.parse url
|
9
|
+
|
10
|
+
Net::FTP.open(uri.host) do |ftp|
|
11
|
+
ftp.passive = true
|
12
|
+
ftp.login
|
13
|
+
ftp.getbinaryfile UrlDecoder.decode(uri.path), nil, &chunk_block
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require "http"
|
2
|
+
require "openssl"
|
3
|
+
require "clients/http_client/response"
|
4
|
+
require "clients/errors"
|
5
|
+
|
6
|
+
module Clients
|
7
|
+
class HttpClient
|
8
|
+
attr_writer :user_agent, :cookies
|
9
|
+
attr_accessor :proxy
|
10
|
+
|
11
|
+
def initialize(
|
12
|
+
proxy: nil,
|
13
|
+
logger: nil
|
14
|
+
)
|
15
|
+
@proxy = proxy
|
16
|
+
@logger = logger
|
17
|
+
end
|
18
|
+
|
19
|
+
def proxy?
|
20
|
+
!!proxy
|
21
|
+
end
|
22
|
+
|
23
|
+
def has_cookies?
|
24
|
+
cookies.any?
|
25
|
+
end
|
26
|
+
|
27
|
+
def get(url, **options, &block)
|
28
|
+
request :get, url, **options, &block
|
29
|
+
end
|
30
|
+
|
31
|
+
def post(url, **options, &block)
|
32
|
+
request :post, url, **options, &block
|
33
|
+
end
|
34
|
+
|
35
|
+
def head(url, **options, &block)
|
36
|
+
request :head, url, **options, &block
|
37
|
+
end
|
38
|
+
|
39
|
+
def request(verb, url, **options)
|
40
|
+
options = options.merge(ssl_context: ssl_context)
|
41
|
+
|
42
|
+
request = setup_request options.delete(:follow_redirects)
|
43
|
+
request = yield request if block_given?
|
44
|
+
|
45
|
+
response = make_request(request, verb, url, **options)
|
46
|
+
|
47
|
+
Response.new response
|
48
|
+
rescue
|
49
|
+
raise HttpClientError.new(url: url, proxy: proxy)
|
50
|
+
end
|
51
|
+
|
52
|
+
def reset
|
53
|
+
reset_user_agent
|
54
|
+
reset_proxy
|
55
|
+
reset_cookies
|
56
|
+
end
|
57
|
+
|
58
|
+
def reset_user_agent
|
59
|
+
self.user_agent = nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def reset_proxy
|
63
|
+
return unless proxy
|
64
|
+
|
65
|
+
log "Reset proxy to #{proxy.host}:#{proxy.port}"
|
66
|
+
proxy.reset!
|
67
|
+
end
|
68
|
+
|
69
|
+
def store_cookies(cookies)
|
70
|
+
return if cookies.empty?
|
71
|
+
cookies.each do |cookie|
|
72
|
+
self.cookies << cookie
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def reset_cookies
|
77
|
+
@cookies = nil
|
78
|
+
end
|
79
|
+
|
80
|
+
def cookies
|
81
|
+
@cookies ||= HTTP::CookieJar.new
|
82
|
+
end
|
83
|
+
|
84
|
+
def user_agent
|
85
|
+
@user_agent ||= sample_user_agent
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
class << self
|
91
|
+
attr_writer :user_agents
|
92
|
+
|
93
|
+
def user_agents
|
94
|
+
@user_agents ||= File.readlines user_agents_path
|
95
|
+
end
|
96
|
+
|
97
|
+
def user_agents_path
|
98
|
+
File.join File.dirname(__FILE__), "../../data/user_agents.txt"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def setup_request(follow_redirects)
|
103
|
+
follow_redirects = true if follow_redirects.nil?
|
104
|
+
|
105
|
+
request = HTTP.headers(user_agent: user_agent)
|
106
|
+
request = request.follow if follow_redirects
|
107
|
+
request = request.cookies(cookies) if has_cookies?
|
108
|
+
request = request.via(proxy.host, proxy.port, proxy.user, proxy.password) if proxy?
|
109
|
+
|
110
|
+
request
|
111
|
+
end
|
112
|
+
|
113
|
+
def make_request(request, verb, url, **options)
|
114
|
+
start = Time.now
|
115
|
+
|
116
|
+
response = request.request(verb, url, **options)
|
117
|
+
|
118
|
+
log_request(
|
119
|
+
verb: verb.to_s.upcase,
|
120
|
+
url: url,
|
121
|
+
duration: (Time.now - start),
|
122
|
+
status: response.status.code,
|
123
|
+
mime_type: response.content_type.mime_type
|
124
|
+
)
|
125
|
+
|
126
|
+
response
|
127
|
+
end
|
128
|
+
|
129
|
+
def ssl_context
|
130
|
+
ctx = OpenSSL::SSL::SSLContext.new
|
131
|
+
ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
132
|
+
ctx
|
133
|
+
end
|
134
|
+
|
135
|
+
def sample_user_agent
|
136
|
+
self.class.user_agents.sample.strip
|
137
|
+
end
|
138
|
+
|
139
|
+
def log_request(req)
|
140
|
+
return unless @logger
|
141
|
+
|
142
|
+
msg = "#{req[:verb]} #{req[:url]} (#{req[:duration]}s)"
|
143
|
+
|
144
|
+
log req.merge(message: msg, proxy: proxy&.to_s)
|
145
|
+
end
|
146
|
+
|
147
|
+
def log(msg_or_hash)
|
148
|
+
return unless @logger
|
149
|
+
@logger.info msg_or_hash
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|