scraper_clients 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +26 -0
- data/bin/pry +17 -0
- data/bin/rspec +17 -0
- data/data/user_agents.txt +204 -0
- data/lib/clients.rb +24 -0
- data/lib/clients/errors.rb +16 -0
- data/lib/clients/ftp_client.rb +17 -0
- data/lib/clients/http_client.rb +152 -0
- data/lib/clients/http_client/response.rb +57 -0
- data/lib/clients/proxy6_client.rb +70 -0
- data/lib/clients/proxy_client.rb +14 -0
- data/lib/clients/proxy_list_client.rb +38 -0
- data/lib/clients/recaptcha/client.rb +48 -0
- data/lib/clients/recaptcha/response.rb +15 -0
- data/lib/clients/recaptcha/solver.rb +115 -0
- data/lib/clients/tor_client.rb +146 -0
- data/lib/clients/url_decoder.rb +8 -0
- data/lib/clients/version.rb +3 -0
- data/spec/lib/clients/http_client/response_spec.rb +197 -0
- data/spec/lib/clients/http_client_spec.rb +221 -0
- data/spec/lib/clients/tor_client_spec.rb +34 -0
- data/spec/spec_helper.rb +66 -0
- metadata +168 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: '083902af70aedc2d96125f969723f27e07349119bc1106ca70cae69f3628cd48'
|
4
|
+
data.tar.gz: 85a342743147184e65bd732e28bbb32719e40761e733525e8c4c8bf6fcaf4217
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a40213ebfa607fcd7b90a62b14aa810a9a9b6bd702775949b49499225459250b8987550c913ec74985fafba6f17750d1c47c254b57894024d4ae088a63850abc
|
7
|
+
data.tar.gz: 2f7abfaa1aabeb52023c4a5dbcdeed5fdaabbe18749687b6767cfb6149e6803402b6c0abe1da3ab7c89f732fa6555f7ecd892bb9c6a9ab52ebaead80e5959147
|
data/README.md
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Clients
|
2
|
+
=======
|
3
|
+
|
4
|
+
Clients contains instruments that are suited to make requests during scraping.
|
5
|
+
|
6
|
+
It includes following clients:
|
7
|
+
|
8
|
+
- **HttpClient:** to fetch web pages or files
|
9
|
+
- **FtpClient:** to fetch files from ftp
|
10
|
+
- **TorClient:** to proxy client requests via tor
|
11
|
+
- **Proxy6Client:** to proxy client request via any of proxy6 proxies
|
12
|
+
- **ProxyListClient:** to proxy client request via any of the proxies in the list in /tmp/clients_proxy_list.txt
|
13
|
+
- **ProxyList:** to select proxy client based on CLIENTS_PROXY_CLIENT variable (e.g. `list` or `proxy6`)
|
14
|
+
|
15
|
+
It also implements a special wrapper around of HttpClient:
|
16
|
+
|
17
|
+
- **Recaptcha::Client:** to visit websites behind recaptcha blocks
|
18
|
+
|
19
|
+
Important ENV variables:
|
20
|
+
|
21
|
+
- **CLIENTS_PROXY_CLIENT:** to control which proxy client will be selected by ProxyClient dispatcher (valid values: `list` or `proxy6`)
|
22
|
+
- **PROXY6_KEY:** API key for proxy6.net service
|
23
|
+
- **CAPTCHA_SOLVER_KEY:** API key for 2captcha.com service
|
24
|
+
- **TOR_PORT:** Base port for tor SOCKS5 proxy
|
25
|
+
- **TOR_CONTROL_PORT:** Base port for tor controls
|
26
|
+
- **HTTP_TOR_PORT:** Base port for http middleman proxy for TorClient (e.g. polipo)
|
data/bin/pry
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
#
|
4
|
+
# This file was generated by Bundler.
|
5
|
+
#
|
6
|
+
# The application 'pry' is installed as part of a gem, and
|
7
|
+
# this file is here to facilitate running it.
|
8
|
+
#
|
9
|
+
|
10
|
+
require "pathname"
|
11
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
12
|
+
Pathname.new(__FILE__).realpath)
|
13
|
+
|
14
|
+
require "rubygems"
|
15
|
+
require "bundler/setup"
|
16
|
+
|
17
|
+
load Gem.bin_path("pry", "pry")
|
data/bin/rspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
#
|
4
|
+
# This file was generated by Bundler.
|
5
|
+
#
|
6
|
+
# The application 'rspec' is installed as part of a gem, and
|
7
|
+
# this file is here to facilitate running it.
|
8
|
+
#
|
9
|
+
|
10
|
+
require "pathname"
|
11
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
12
|
+
Pathname.new(__FILE__).realpath)
|
13
|
+
|
14
|
+
require "rubygems"
|
15
|
+
require "bundler/setup"
|
16
|
+
|
17
|
+
load Gem.bin_path("rspec-core", "rspec")
|
@@ -0,0 +1,204 @@
|
|
1
|
+
Mozilla/5.0 (Amiga; U; AmigaOS 1.3; en; rv:1.8.1.19) Gecko/20081204 SeaMonkey/1.1.14
|
2
|
+
Mozilla/5.0 (AmigaOS; U; AmigaOS 1.3; en-US; rv:1.8.1.21) Gecko/20090303 SeaMonkey/1.1.15
|
3
|
+
Mozilla/5.0 (AmigaOS; U; AmigaOS 1.3; en; rv:1.8.1.19) Gecko/20081204 SeaMonkey/1.1.14
|
4
|
+
Mozilla/5.0 (BeOS; U; BeOS BeBox; fr; rv:1.9) Gecko/2008052906 BonEcho/2.0
|
5
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.1) Gecko/20061220 BonEcho/2.0.0.1
|
6
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.10) Gecko/20071128 BonEcho/2.0.0.10
|
7
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.17) Gecko/20080831 BonEcho/2.0.0.17
|
8
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.6) Gecko/20070731 BonEcho/2.0.0.6
|
9
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1.7) Gecko/20070917 BonEcho/2.0.0.7
|
10
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.8.1b2) Gecko/20060901 Firefox/2.0b2
|
11
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.9a1) Gecko/20051002 Firefox/1.6a1
|
12
|
+
Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.9a1) Gecko/20060702 SeaMonkey/1.5a
|
13
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.10pre) Gecko/20080112 SeaMonkey/1.1.7pre
|
14
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.14) Gecko/20080429 BonEcho/2.0.0.14
|
15
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.17) Gecko/20080831 BonEcho/2.0.0.17
|
16
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.18) Gecko/20081114 BonEcho/2.0.0.18
|
17
|
+
Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.21pre) Gecko/20090218 BonEcho/2.0.0.21pre
|
18
|
+
Mozilla/5.0 (Darwin; FreeBSD 5.6; en-GB; rv:1.8.1.17pre) Gecko/20080716 K-Meleon/1.5.0
|
19
|
+
Mozilla/5.0 (Darwin; FreeBSD 5.6; en-GB; rv:1.9.1b3pre)Gecko/20081211 K-Meleon/1.5.2
|
20
|
+
Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7
|
21
|
+
Mozilla/5.0 (Linux 2.4.18-18.7.x i686; U) Opera 6.03 [en]
|
22
|
+
Mozilla/5.0 (Linux 2.4.18-ltsp-1 i686; U) Opera 6.1 [en]
|
23
|
+
Mozilla/5.0 (Linux 2.4.19-16mdk i686; U) Opera 6.11 [en]
|
24
|
+
Mozilla/5.0 (Linux 2.4.21-0.13mdk i686; U) Opera 7.11 [en]
|
25
|
+
Mozilla/5.0 (Linux X86; U; Debian SID; it; rv:1.9.0.1) Gecko/2008070208 Debian IceWeasel/3.0.1
|
26
|
+
Mozilla/5.0 (Linux i686 ; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.70
|
27
|
+
Mozilla/5.0 (Linux i686; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0
|
28
|
+
Mozilla/5.0 (Linux i686; U; en; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.51
|
29
|
+
Mozilla/5.0 (Linux) Gecko Iceweasel (Debian) Mnenhy
|
30
|
+
Mozilla/5.0 (Linux; U) Opera 6.02 [en]
|
31
|
+
Mozilla/5.0 (Linux; U; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13
|
32
|
+
Mozilla/5.0 (MSIE 7.0; Macintosh; U; SunOS; X11; gu; SV1; InfoPath.2; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)
|
33
|
+
Mozilla/5.0 (Macintosh; ; Intel Mac OS X; fr; rv:1.8.1.1) Gecko/20061204 Opera
|
34
|
+
Mozilla/5.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4
|
35
|
+
Mozilla/5.0 (Macintosh; I; PPC Mac OS X Mach-O; en-US; rv:1.9a1) Gecko/20061204 Firefox/3.0a1
|
36
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20110608 SeaMonkey/2.1
|
37
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b11) Gecko/20110209 Firefox/ SeaMonkey/2.1b2
|
38
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b11pre) Gecko/20110126 Firefox/4.0b11pre
|
39
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b8) Gecko/20100101 Firefox/4.0b8
|
40
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0) Gecko/20100101 Firefox/9.0
|
41
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0a2) Gecko/20111101 Firefox/9.0a2
|
42
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24
|
43
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.31 (KHTML, like Gecko) Chrome/13.0.748.0 Safari/534.31
|
44
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1
|
45
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1
|
46
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
|
47
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19
|
48
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6) AppleWebKit/531.4 (KHTML, like Gecko) Version/4.0.3 Safari/531.4
|
49
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_0) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1200.0 Iron/21.0.1200.0 Safari/537.1
|
50
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_0) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4
|
51
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1
|
52
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.32 Safari/535.1
|
53
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1
|
54
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30
|
55
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30
|
56
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11
|
57
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24
|
58
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.698.0 Safari/534.24
|
59
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.357 Chrome/11.0.696.71 Safari/534.24
|
60
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30
|
61
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24
|
62
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) Iron/11.0.700.2 Chrome/11.0.700.2 Safari/534.24
|
63
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.283 Chrome/11.0.696.65 Safari/534.24
|
64
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.292 Chrome/11.0.696.68 Safari/534.24
|
65
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.310 Chrome/11.0.696.68 Safari/534.24
|
66
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.56.357 Chrome/11.0.696.71 Safari/534.24
|
67
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.209 Chrome/11.0.696.71 Safari/534.24
|
68
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.423 Chrome/11.0.696.71 Safari/534.24
|
69
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.471 Chrome/11.0.696.71 Safari/534.24
|
70
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.478 Chrome/11.0.696.71 Safari/534.24
|
71
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.494 Chrome/11.0.696.71 Safari/534.24
|
72
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1
|
73
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1
|
74
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1
|
75
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1
|
76
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24
|
77
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Iron/11.0.700.2 Chrome/11.0.700.2 Safari/534.24
|
78
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.494 Chrome/11.0.696.71 Safari/534.24
|
79
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.68 Safari/534.30
|
80
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/4.0.5 Safari/531.22.7
|
81
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1
|
82
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
|
83
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Iron/17.0.1000.0 Chrome/17.0.1000.0 Safari/535.11
|
84
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.11 Safari/535.19
|
85
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19
|
86
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2
|
87
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.54 Safari/535.2
|
88
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7
|
89
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1150.1 Iron/20.0.1150.1 Safari/536.11
|
90
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2
|
91
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.0 Safari/534.24
|
92
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30
|
93
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1
|
94
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1
|
95
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11
|
96
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2
|
97
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1
|
98
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1
|
99
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11
|
100
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
|
101
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19
|
102
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24
|
103
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.7 (KHTML, like Gecko) Iron/16.0.950.0 Chrome/16.0.950.0 Safari/535.7
|
104
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10
|
105
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.1 (KHTML, like Gecko) Iron/14.0.850.0 Chrome/14.0.850.0 Safari/535.1
|
106
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
|
107
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20
|
108
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/19.0.1047.0 Safari/535.22
|
109
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.7 (KHTML, like Gecko) Iron/16.0.950.0 Chrome/16.0.950.0 Safari/535.7
|
110
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/535.19 (KHTML, like Gecko) Iron/18.0.1050.0 Chrome/18.0.1050.0 Safari/535.19
|
111
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) Iron/19.0.1100.0 Chrome/19.0.1100.0 Safari/536.5
|
112
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8) AppleWebKit/536.15 (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
113
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1150.1 Iron/20.0.1150.1 Safari/536.11
|
114
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3
|
115
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1200.0 Iron/21.0.1200.0 Safari/537.1
|
116
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.6 Safari/537.11
|
117
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X; U; en; rv:1.8.0) Gecko/20060728 Firefox/1.5.0 Opera 9.27
|
118
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X; U; nb; rv:1.7.5) Gecko/20041110
|
119
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.4; rv:10.0.2) Gecko/20120217 Firefox/10.0.2 TenFourFox/G3
|
120
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.5; rv:10.0.2) Gecko/20120216 Firefox/10.0.2 TenFourFox/7450
|
121
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/534.50.2 (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
122
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/536.15+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
123
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/536.17+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
124
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/536.25+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
125
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/537.1+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
126
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_8) AppleWebKit/537.3+ (KHTML, like Gecko) iCab/5.0 Safari/533.16
|
127
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1
|
128
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en) Opera 8.51
|
129
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en; rv:1.8.0) Gecko/20060728 Firefox/1.5.0
|
130
|
+
Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0
|
131
|
+
Mozilla/5.0 (Macintosh; U; Intel 80486Mac OS X; en-US) AppleWebKit/528.16 (KHTML, like Gecko, Safari/528.16) OmniWeb/v622.8.0.112916
|
132
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.19) Gecko/2010062819 Firefox/3.0.19 Flock/2.6.1
|
133
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.4) Gecko/2008111323 Firefox/3.0.4 Flock/2.0.2
|
134
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.5) Gecko/2008121716 Firefox/3.0.5 Flock/2.0.3
|
135
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.9) Gecko/2009042318 Firefox/3.0.9 Wyzo/3.0.2
|
136
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.9) Gecko/2009042318 Firefox/3.0.9 Wyzo/3.0.3 GTB6
|
137
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.1b3pre) Gecko/20090223 SeaMonkey/2.0a3
|
138
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.1) Gecko/2008070206
|
139
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.10) Gecko/2009122115 Firefox/3.0.17
|
140
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.3) Gecko/2008100716 Firefox/3.0.3 Flock/2.0
|
141
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.3pre) Gecko/2008090704 GranParadiso/3.0.3pre
|
142
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.4) Gecko/2008111323 Firefox/3.0.4 Flock/2.0.2
|
143
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.6) Gecko/2009011912 Safari/525.27.1
|
144
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.9) Gecko/2009042318 Firefox/3.0.9 Wyzo/3.0.3
|
145
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1a2pre) Gecko/20080826052737 Minefield/3.1a2pre
|
146
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b1pre) Gecko/20080908170408 Minefield/3.1b1pre
|
147
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20081202 SeaMonkey/2.0a2
|
148
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20090204 Firefox/3.1b3pre
|
149
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b4) Gecko/20090423 Firefox/3.5b4 GTB5
|
150
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-gb) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
151
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
152
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; es-es) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
153
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-ch) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4
|
154
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-fr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
155
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; it-it) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
156
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
157
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ko-kr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
158
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; sv-se) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
159
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27
|
160
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1
|
161
|
+
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; en-US) AppleWebKit/531.21.8+(KHTML, like Gecko, Safari/528.16) Version/5.10.3 OmniWeb/622.14.0
|
162
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16
|
163
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16
|
164
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Iron/10.0.650.0 Chrome/10.0.650.0 Safari/534.16
|
165
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16
|
166
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.133 Chrome/10.0.648.133 Safari/534.16
|
167
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.2 (KHTML, like Gecko) Chrome/6.0.453.1 Safari/534.2
|
168
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.457.0 Safari/534.3
|
169
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.458.0 Safari/534.3
|
170
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3
|
171
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.462.0 Safari/534.3
|
172
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Iron/6.0.475.1 Chrome/6.0.475.1 Safari/534.3
|
173
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.24 Safari/534.7
|
174
|
+
Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Iron/7.0.520.0 Chrome/7.0.520.0 Safari/534.7
|
175
|
+
Mozilla/5.0(Windows; U; Windows NT 5.2; rv:1.9.2) Gecko/20100101 Firefox/3.6
|
176
|
+
Mozilla/5.0(Windows; U; Windows NT 7.0; rv:1.9.2) Gecko/20100101 Firefox/3.6
|
177
|
+
Mozilla/5.0(X11;U;Linux(x86_64);en;rv:1.9a8)Gecko/2007100619;GranParadiso/3.1
|
178
|
+
Mozilla/5.001 (Macintosh; N; PPC; ja) Gecko/25250101
|
179
|
+
Mozilla/5.001 (X11; U; Linux i686; rv:1.8.1.6; de-ch) Gecko/25250101 (ubuntu-feisty)
|
180
|
+
Mozilla/6.0 (Future Star Technologies Corp. Star-Blade OS; U; en-US) iNet Browser 2.5
|
181
|
+
Mozilla/6.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4
|
182
|
+
Mozilla/6.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:2.0.0.0) Gecko/20061028 Firefox/3.0
|
183
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US) Gecko/2009032609 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.7
|
184
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US) Gecko/2009032609 Chrome/2.0.172.6 Safari/530.7
|
185
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8
|
186
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8 (.NET CLR 3.5.30729)
|
187
|
+
Mozilla/6.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0
|
188
|
+
Mozilla/6.0 (Windows; U; Windows NT 7.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.9 (.NET CLR 3.5.30729)
|
189
|
+
Mozilla/6.0 (X11; U; Linux x86_64; en-US; rv:2.9.0.3) Gecko/2009022510 FreeBSD/ Sunrise/4.0.1/like Safari
|
190
|
+
Opera 9.7 (Windows NT 5.2; U; en)
|
191
|
+
Opera/10.50 (Windows NT 6.1; U; en-GB) Presto/2.2.2
|
192
|
+
Opera/10.60 (Windows NT 5.1; U; en-US) Presto/2.6.30 Version/10.60
|
193
|
+
Opera/10.60 (Windows NT 5.1; U; zh-cn) Presto/2.6.30 Version/10.60
|
194
|
+
Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01
|
195
|
+
Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10
|
196
|
+
Opera/9.80 (X11; Linux x86_64; U; de) Presto/2.2.15 Version/10.00
|
197
|
+
Opera/9.80 (X11; Linux x86_64; U; en) Presto/2.2.15 Version/10.00
|
198
|
+
Opera/9.80 (X11; Linux x86_64; U; en-GB) Presto/2.2.15 Version/10.01
|
199
|
+
Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50
|
200
|
+
Opera/9.80 (X11; Linux x86_64; U; it) Presto/2.2.15 Version/10.10
|
201
|
+
Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00
|
202
|
+
Opera/9.80 (X11; U; Linux i686; en-US; rv:1.9.2.3) Presto/2.2.15 Version/10.10
|
203
|
+
Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9
|
204
|
+
Opera/9.99 (X11; U; sk)
|
data/lib/clients.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "clients/tor_client"
|
2
|
+
require "clients/http_client"
|
3
|
+
require "clients/proxy_client"
|
4
|
+
require "clients/recaptcha/client"
|
5
|
+
|
6
|
+
module Clients
|
7
|
+
class << self
|
8
|
+
attr_writer :logger
|
9
|
+
|
10
|
+
def logger
|
11
|
+
@logger ||= ::Logger.new("log/clients.log").tap do |logger|
|
12
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
13
|
+
"#{datetime} #{progname} TID-#{Thread.current.object_id.to_s(36)} #{severity}: #{msg}\n"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Helper to get default setup of HttpClient with ProxyClient
|
19
|
+
def setup_http_client(logger: nil)
|
20
|
+
proxy = Clients::ProxyClient.from_env
|
21
|
+
Clients::HttpClient.new(proxy: proxy, logger: logger)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Clients
|
2
|
+
class ClientError < StandardError; end
|
3
|
+
|
4
|
+
class HttpClientError < ClientError
|
5
|
+
# rubocop:disable Style/SpecialGlobalVars:
|
6
|
+
def initialize(msg = "Failed request", url: nil, proxy: nil, cause: $!)
|
7
|
+
msg << " at #{url}" if url
|
8
|
+
msg << " via #{proxy.host}:#{proxy.port}" if proxy
|
9
|
+
msg << " caused by #{cause.class}: #{cause.message}" if cause
|
10
|
+
super msg
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class ProxyClientError < ClientError; end
|
15
|
+
class RecaptchaError < ClientError; end
|
16
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "net/ftp"
|
2
|
+
require "addressable/uri"
|
3
|
+
require "clients/url_decoder"
|
4
|
+
|
5
|
+
module Clients
|
6
|
+
class FtpClient
|
7
|
+
def get(url, &chunk_block)
|
8
|
+
uri = Addressable::URI.parse url
|
9
|
+
|
10
|
+
Net::FTP.open(uri.host) do |ftp|
|
11
|
+
ftp.passive = true
|
12
|
+
ftp.login
|
13
|
+
ftp.getbinaryfile UrlDecoder.decode(uri.path), nil, &chunk_block
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require "http"
|
2
|
+
require "openssl"
|
3
|
+
require "clients/http_client/response"
|
4
|
+
require "clients/errors"
|
5
|
+
|
6
|
+
module Clients
|
7
|
+
class HttpClient
|
8
|
+
attr_writer :user_agent, :cookies
|
9
|
+
attr_accessor :proxy
|
10
|
+
|
11
|
+
def initialize(
|
12
|
+
proxy: nil,
|
13
|
+
logger: nil
|
14
|
+
)
|
15
|
+
@proxy = proxy
|
16
|
+
@logger = logger
|
17
|
+
end
|
18
|
+
|
19
|
+
def proxy?
|
20
|
+
!!proxy
|
21
|
+
end
|
22
|
+
|
23
|
+
def has_cookies?
|
24
|
+
cookies.any?
|
25
|
+
end
|
26
|
+
|
27
|
+
def get(url, **options, &block)
|
28
|
+
request :get, url, **options, &block
|
29
|
+
end
|
30
|
+
|
31
|
+
def post(url, **options, &block)
|
32
|
+
request :post, url, **options, &block
|
33
|
+
end
|
34
|
+
|
35
|
+
def head(url, **options, &block)
|
36
|
+
request :head, url, **options, &block
|
37
|
+
end
|
38
|
+
|
39
|
+
def request(verb, url, **options)
|
40
|
+
options = options.merge(ssl_context: ssl_context)
|
41
|
+
|
42
|
+
request = setup_request options.delete(:follow_redirects)
|
43
|
+
request = yield request if block_given?
|
44
|
+
|
45
|
+
response = make_request(request, verb, url, **options)
|
46
|
+
|
47
|
+
Response.new response
|
48
|
+
rescue
|
49
|
+
raise HttpClientError.new(url: url, proxy: proxy)
|
50
|
+
end
|
51
|
+
|
52
|
+
def reset
|
53
|
+
reset_user_agent
|
54
|
+
reset_proxy
|
55
|
+
reset_cookies
|
56
|
+
end
|
57
|
+
|
58
|
+
def reset_user_agent
|
59
|
+
self.user_agent = nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def reset_proxy
|
63
|
+
return unless proxy
|
64
|
+
|
65
|
+
log "Reset proxy to #{proxy.host}:#{proxy.port}"
|
66
|
+
proxy.reset!
|
67
|
+
end
|
68
|
+
|
69
|
+
def store_cookies(cookies)
|
70
|
+
return if cookies.empty?
|
71
|
+
cookies.each do |cookie|
|
72
|
+
self.cookies << cookie
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def reset_cookies
|
77
|
+
@cookies = nil
|
78
|
+
end
|
79
|
+
|
80
|
+
def cookies
|
81
|
+
@cookies ||= HTTP::CookieJar.new
|
82
|
+
end
|
83
|
+
|
84
|
+
def user_agent
|
85
|
+
@user_agent ||= sample_user_agent
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
class << self
|
91
|
+
attr_writer :user_agents
|
92
|
+
|
93
|
+
def user_agents
|
94
|
+
@user_agents ||= File.readlines user_agents_path
|
95
|
+
end
|
96
|
+
|
97
|
+
def user_agents_path
|
98
|
+
File.join File.dirname(__FILE__), "../../data/user_agents.txt"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def setup_request(follow_redirects)
|
103
|
+
follow_redirects = true if follow_redirects.nil?
|
104
|
+
|
105
|
+
request = HTTP.headers(user_agent: user_agent)
|
106
|
+
request = request.follow if follow_redirects
|
107
|
+
request = request.cookies(cookies) if has_cookies?
|
108
|
+
request = request.via(proxy.host, proxy.port, proxy.user, proxy.password) if proxy?
|
109
|
+
|
110
|
+
request
|
111
|
+
end
|
112
|
+
|
113
|
+
def make_request(request, verb, url, **options)
|
114
|
+
start = Time.now
|
115
|
+
|
116
|
+
response = request.request(verb, url, **options)
|
117
|
+
|
118
|
+
log_request(
|
119
|
+
verb: verb.to_s.upcase,
|
120
|
+
url: url,
|
121
|
+
duration: (Time.now - start),
|
122
|
+
status: response.status.code,
|
123
|
+
mime_type: response.content_type.mime_type
|
124
|
+
)
|
125
|
+
|
126
|
+
response
|
127
|
+
end
|
128
|
+
|
129
|
+
def ssl_context
|
130
|
+
ctx = OpenSSL::SSL::SSLContext.new
|
131
|
+
ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
132
|
+
ctx
|
133
|
+
end
|
134
|
+
|
135
|
+
def sample_user_agent
|
136
|
+
self.class.user_agents.sample.strip
|
137
|
+
end
|
138
|
+
|
139
|
+
def log_request(req)
|
140
|
+
return unless @logger
|
141
|
+
|
142
|
+
msg = "#{req[:verb]} #{req[:url]} (#{req[:duration]}s)"
|
143
|
+
|
144
|
+
log req.merge(message: msg, proxy: proxy&.to_s)
|
145
|
+
end
|
146
|
+
|
147
|
+
def log(msg_or_hash)
|
148
|
+
return unless @logger
|
149
|
+
@logger.info msg_or_hash
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|