tiny_grabber 0.2.8 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -0
- data/README.md +5 -0
- data/Rakefile +3 -3
- data/lib/tiny_grabber/agent.rb +283 -277
- data/lib/tiny_grabber/debug.rb +10 -56
- data/lib/tiny_grabber/http.rb +6 -8
- data/lib/tiny_grabber/version.rb +1 -1
- data/lib/tiny_grabber.rb +24 -34
- data/tiny_grabber.gemspec +3 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 96bdccdeb24ccdbbb99e4cb2e73bf1450aca4e4f
|
4
|
+
data.tar.gz: 785bed54953a6faa41325aa8d8dda56f688b638d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4256e04d8e42b404c09e32ad66bd99c4c1f38680be021fcb6e36e6e8a1e86e65be548945d853583fad0177d53e2732a275ec804079531757d477af91bd403674
|
7
|
+
data.tar.gz: 46f84bf20a8c0e4de69e3bea855c5d3a26bd66332c75fbb7e2e6ed1805364c05262e10d897f4ea458931ac842819baee55f0a3c4cd7ed731ce7207c2bdd8d149
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -126,6 +126,11 @@ response.body
|
|
126
126
|
|
127
127
|
## Changelog
|
128
128
|
|
129
|
+
* *v 0.2.9*
|
130
|
+
* Added agent attribute for redirect follow location
|
131
|
+
* Used 302 http answer code and header location for redirecting
|
132
|
+
* Used meta refresh url
|
133
|
+
* Refactored code for rubocop
|
129
134
|
* *v 0.2.8*
|
130
135
|
* Added processing Accept headers
|
131
136
|
* *v 0.2.7*
|
data/Rakefile
CHANGED
data/lib/tiny_grabber/agent.rb
CHANGED
@@ -2,334 +2,340 @@
|
|
2
2
|
# Initialize connect with Resource
|
3
3
|
# Setting connect attributes
|
4
4
|
#
|
5
|
-
class TinyGrabber
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
#
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
#
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
#
|
71
|
-
def debug= debug
|
72
|
-
debug = var_to_sym(debug, true)
|
73
|
-
if debug.is_a?(Hash)
|
74
|
-
@debug.active = debug[:active]
|
75
|
-
@debug.destination = debug[:destination]
|
76
|
-
@debug.save_html = debug[:save_html]
|
77
|
-
elsif debug.is_a?(TrueClass)
|
78
|
-
@debug.active = true
|
5
|
+
class TinyGrabber
|
6
|
+
class Agent
|
7
|
+
# Debug configuration
|
8
|
+
attr_writer :debug
|
9
|
+
# Max time to execute request
|
10
|
+
attr_writer :read_timeout
|
11
|
+
# Web browser name
|
12
|
+
attr_writer :user_agent
|
13
|
+
# Remote proxy configuration
|
14
|
+
attr_accessor :proxy
|
15
|
+
# Basic authentification configuration
|
16
|
+
attr_writer :basic_auth
|
17
|
+
# Headers
|
18
|
+
attr_writer :headers
|
19
|
+
# Headers
|
20
|
+
attr_writer :cookies
|
21
|
+
# Set verify mode
|
22
|
+
attr_writer :verify_mode
|
23
|
+
# Follow location
|
24
|
+
attr_writer :follow_location
|
25
|
+
|
26
|
+
# Agent aliases given from http://www.useragentstring.com/pages/Chrome/
|
27
|
+
AGENT_ALIASES = [
|
28
|
+
# Chrome
|
29
|
+
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
|
30
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
|
31
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
|
32
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
|
33
|
+
# Firefox
|
34
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
|
35
|
+
'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
|
36
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
|
37
|
+
'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
|
38
|
+
# Internet Explorer
|
39
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
|
40
|
+
'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
|
41
|
+
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
|
42
|
+
'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
|
43
|
+
# Opera
|
44
|
+
'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
|
45
|
+
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
|
46
|
+
'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
|
47
|
+
'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52'
|
48
|
+
].freeze
|
49
|
+
|
50
|
+
# Initialization object
|
51
|
+
#
|
52
|
+
def initialize
|
53
|
+
@debug = Debug.new
|
54
|
+
|
55
|
+
# Initialize variables agent attributes
|
56
|
+
@user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
|
57
|
+
@proxy = []
|
58
|
+
@basic_auth = {}
|
59
|
+
@headers = {}
|
60
|
+
@cookies = nil
|
61
|
+
@follow_location = false
|
62
|
+
@read_timeout = 10
|
63
|
+
# Initialize variable for URI object
|
64
|
+
@uri = nil
|
65
|
+
# Initialize variable for Net::HTTP request object
|
66
|
+
@http = Net::HTTP
|
67
|
+
# Initialize variable for Net::HTTP response object
|
68
|
+
@response = nil
|
69
|
+
@verify_mode = OpenSSL::SSL::VERIFY_NONE
|
79
70
|
end
|
80
|
-
end
|
81
|
-
|
82
71
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
#
|
97
|
-
def user_agent= user_agent
|
98
|
-
fail 'attribute user_agent must be String' unless user_agent.is_a?(String)
|
99
|
-
@user_agent = user_agent
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
# Initialize Net::HTTP connection through proxy provider
|
104
|
-
# TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
|
105
|
-
#
|
106
|
-
# @param proxy Proxy configuration
|
107
|
-
#
|
108
|
-
def proxy= proxy
|
109
|
-
if proxy.is_a?(String)
|
110
|
-
ip, port, type = proxy.split(':')
|
111
|
-
fail 'attribute proxy must be in format ip:port' unless ip and port
|
112
|
-
type ||= :http
|
113
|
-
proxy = { ip: ip, port: port, type: type }
|
72
|
+
# Set debug configuration
|
73
|
+
#
|
74
|
+
# @param debug
|
75
|
+
#
|
76
|
+
def debug=(debug)
|
77
|
+
debug = var_to_sym(debug, true)
|
78
|
+
if debug.is_a?(Hash)
|
79
|
+
@debug.active = debug[:active]
|
80
|
+
@debug.destination = debug[:destination]
|
81
|
+
@debug.save_html = debug[:save_html]
|
82
|
+
elsif debug.is_a?(TrueClass)
|
83
|
+
@debug.active = true
|
84
|
+
end
|
114
85
|
end
|
115
|
-
proxy = var_to_sym(proxy)
|
116
|
-
fail 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
|
117
|
-
fail 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] and proxy[:port]
|
118
86
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
87
|
+
# Set READ_TIMEOUT agent attribute
|
88
|
+
#
|
89
|
+
# @param read_timeout Waiting time to reading
|
90
|
+
#
|
91
|
+
def read_timeout=(read_timeout)
|
92
|
+
raise 'attribute read_timeout must be Integer' unless read_timeout.is_a?(Integer)
|
93
|
+
@read_timeout = read_timeout
|
124
94
|
end
|
125
|
-
end
|
126
|
-
|
127
|
-
|
128
|
-
# Set BASIC_AUTH agent attribute
|
129
|
-
#
|
130
|
-
# @param basic_auth Authentification configuration
|
131
|
-
#
|
132
|
-
def basic_auth= basic_auth
|
133
|
-
basic_auth = var_to_sym(basic_auth)
|
134
|
-
fail 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
|
135
|
-
fail 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] and basic_auth[:password]
|
136
|
-
@basic_auth = basic_auth
|
137
|
-
end
|
138
|
-
|
139
|
-
|
140
|
-
# Set HEADERS agent attribute
|
141
|
-
#
|
142
|
-
# @param headers Request headers
|
143
|
-
#
|
144
|
-
def headers= headers
|
145
|
-
fail 'attribute headers must be Hash' unless headers.is_a?(Hash)
|
146
|
-
@headers = headers
|
147
|
-
end
|
148
95
|
|
96
|
+
# Set USER_AGENT agent attribute
|
97
|
+
#
|
98
|
+
# @param user_agent Web browser name
|
99
|
+
#
|
100
|
+
def user_agent=(user_agent)
|
101
|
+
raise 'attribute user_agent must be String' unless user_agent.is_a?(String)
|
102
|
+
@user_agent = user_agent
|
103
|
+
end
|
149
104
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
105
|
+
# Initialize Net::HTTP connection through proxy provider
|
106
|
+
# TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
|
107
|
+
#
|
108
|
+
# @param proxy Proxy configuration
|
109
|
+
#
|
110
|
+
def proxy=(proxy)
|
111
|
+
if proxy.is_a?(String)
|
112
|
+
ip, port, type = proxy.split(':')
|
113
|
+
raise 'attribute proxy must be in format ip:port' unless ip && port
|
114
|
+
type ||= :http
|
115
|
+
proxy = { ip: ip, port: port, type: type }
|
116
|
+
end
|
117
|
+
proxy = var_to_sym(proxy)
|
118
|
+
raise 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
|
119
|
+
raise 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] && proxy[:port]
|
120
|
+
|
121
|
+
@proxy = proxy
|
122
|
+
@http = if [:socks, 'socks'].include? proxy[:type]
|
123
|
+
Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s)
|
124
|
+
else
|
125
|
+
Net::HTTP::Proxy(proxy[:ip], proxy[:port])
|
126
|
+
end
|
127
|
+
end
|
160
128
|
|
129
|
+
# Set BASIC_AUTH agent attribute
|
130
|
+
#
|
131
|
+
# @param basic_auth Authentification configuration
|
132
|
+
#
|
133
|
+
def basic_auth=(basic_auth)
|
134
|
+
basic_auth = var_to_sym(basic_auth)
|
135
|
+
raise 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
|
136
|
+
raise 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] && basic_auth[:password]
|
137
|
+
@basic_auth = basic_auth
|
138
|
+
end
|
161
139
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
140
|
+
# Set HEADERS agent attribute
|
141
|
+
#
|
142
|
+
# @param headers Request headers
|
143
|
+
#
|
144
|
+
def headers=(headers)
|
145
|
+
raise 'attribute headers must be Hash' unless headers.is_a?(Hash)
|
146
|
+
@headers = headers
|
147
|
+
end
|
169
148
|
|
149
|
+
# Set COOKIES agent attribute
|
150
|
+
#
|
151
|
+
# @param cookies Request cookies
|
152
|
+
#
|
153
|
+
def cookies=(cookies)
|
154
|
+
cookies = var_to_sym(cookies)
|
155
|
+
cookies = cookies.to_a.map { |x| "#{x[0]}=#{x[1]}" }.join('&') if cookies.is_a?(Hash)
|
156
|
+
raise 'attribute cookies must be String' unless cookies.is_a?(String)
|
157
|
+
@cookies = cookies
|
158
|
+
end
|
170
159
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
@
|
186
|
-
@debug.save '------------------------------'
|
160
|
+
# Set verify_mode
|
161
|
+
#
|
162
|
+
# @param verify_mode SSL verify_mode
|
163
|
+
#
|
164
|
+
# def verify_mode=(verify_mode)
|
165
|
+
# @verify_mode = verify_mode
|
166
|
+
# end
|
167
|
+
|
168
|
+
# Init follow location for redirect
|
169
|
+
#
|
170
|
+
# @param follow_location Follow location flag
|
171
|
+
#
|
172
|
+
def follow_location=(follow_location)
|
173
|
+
raise 'attribute follow_location must be Boolean' unless follow_location.is_a?(TrueClass) || follow_location.is_a?(FalseClass)
|
174
|
+
@follow_location = follow_location
|
187
175
|
end
|
188
|
-
|
189
|
-
|
176
|
+
|
177
|
+
# Fetch request for GET and POST HTTP methods
|
178
|
+
# Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute
|
179
|
+
# Make response and save COOKIES for next requests
|
180
|
+
#
|
181
|
+
# @param url Resource link
|
182
|
+
# @param method Request method
|
183
|
+
# @param headers Request header
|
184
|
+
# @param params Request additional params
|
185
|
+
#
|
186
|
+
def fetch(url, method = :get, headers = {}, params = {})
|
187
|
+
if @debug.active
|
188
|
+
@debug.save '=============================='
|
189
|
+
@debug.save "#{method.upcase} #{url}"
|
190
|
+
@debug.save "-> [proxy] = #{@proxy}" if @proxy
|
191
|
+
@debug.save "-> [params] = #{params}"
|
192
|
+
@debug.save '------------------------------'
|
193
|
+
end
|
194
|
+
convert_to_uri url
|
195
|
+
case method
|
190
196
|
when :get
|
191
197
|
@request = Net::HTTP::Get.new(@uri.request_uri)
|
192
198
|
when :post
|
193
199
|
@request = Net::HTTP::Post.new(@uri.request_uri)
|
194
200
|
@request.set_form_data(params)
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
201
|
+
end
|
202
|
+
set_user_agent if @user_agent
|
203
|
+
set_basic_auth unless @basic_auth.empty?
|
204
|
+
@headers = headers unless headers.empty?
|
205
|
+
set_headers unless @headers.empty?
|
206
|
+
set_cookies if @cookies
|
207
|
+
@response = send_request
|
208
|
+
case @response
|
203
209
|
# HTTP response code 1xx
|
204
210
|
when Net::HTTPInformation
|
205
|
-
@debug.save
|
211
|
+
@debug.save '<- [response] = Net::HTTPInformation' if @debug.active
|
206
212
|
# HTTP response code 2xx
|
207
213
|
when Net::HTTPSuccess
|
208
214
|
save_headers if @response.header
|
209
215
|
save_cookies if @response.cookies
|
210
216
|
@debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active
|
217
|
+
# Follow meta refresh
|
218
|
+
if @follow_location
|
219
|
+
refresh = @response.ng.at_css('meta[http-equiv="refresh"]')
|
220
|
+
@response = fetch refresh.attr('content').gsub(/\A.*?(http)/, 'http') if refresh
|
221
|
+
end
|
211
222
|
# HTTP response code 3xx
|
212
223
|
when Net::HTTPRedirection
|
213
224
|
@debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active
|
225
|
+
@debug.save 'try curl user_agent: tg.user_agent=\'curl\''
|
226
|
+
# Follow location
|
227
|
+
@response = fetch @response.header['Location'] if @follow_location
|
214
228
|
# HTTP response code 4xx
|
215
229
|
when Net::HTTPClientError
|
216
230
|
@debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active
|
217
231
|
# HTTP response code 5xx
|
218
232
|
when Net::HTTPServerError
|
219
233
|
@debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active
|
234
|
+
end
|
235
|
+
@debug.save_to_file @response.body if @debug.save_html
|
236
|
+
@response
|
220
237
|
end
|
221
|
-
@debug.save_to_file @response.body if @debug.save_html
|
222
|
-
@response
|
223
|
-
end
|
224
|
-
|
225
|
-
|
226
|
-
# Initialize URI object from request url
|
227
|
-
#
|
228
|
-
# @param url Request link
|
229
|
-
#
|
230
|
-
def set_uri url
|
231
|
-
# It's magic work with escaped url
|
232
|
-
@uri = URI(URI.escape(URI.unescape(url)))
|
233
|
-
@debug.save "-> [uri] = #{@uri}" if @debug.active
|
234
|
-
end
|
235
|
-
|
236
|
-
|
237
|
-
# Set USER_AGENT request attribute
|
238
|
-
#
|
239
|
-
def set_user_agent
|
240
|
-
@headers['User-Agent'] = @user_agent
|
241
|
-
@debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
|
242
|
-
end
|
243
238
|
|
239
|
+
# Initialize URI object from request url
|
240
|
+
#
|
241
|
+
# @param url Request link
|
242
|
+
#
|
243
|
+
def convert_to_uri(url)
|
244
|
+
# It's magic work with escaped url
|
245
|
+
@uri = URI(URI.escape(URI.unescape(url)))
|
246
|
+
@debug.save "-> [uri] = #{@uri}" if @debug.active
|
247
|
+
end
|
244
248
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
249
|
+
# Set USER_AGENT request attribute
|
250
|
+
#
|
251
|
+
def set_user_agent
|
252
|
+
@headers['User-Agent'] = @user_agent
|
253
|
+
@debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
|
254
|
+
end
|
251
255
|
|
256
|
+
# Set BASIC_AUTH request authentification
|
257
|
+
#
|
258
|
+
def set_basic_auth
|
259
|
+
@request.basic_auth @basic_auth[:username], @basic_auth[:password]
|
260
|
+
@debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
|
261
|
+
end
|
252
262
|
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
263
|
+
# Set request HEADERS
|
264
|
+
#
|
265
|
+
def set_headers
|
266
|
+
@headers.each do |k, v|
|
267
|
+
k = String(k)
|
268
|
+
case k
|
259
269
|
when 'Accept'
|
260
270
|
@request[k] = v
|
261
271
|
else
|
262
272
|
@request.add_field(k, v)
|
273
|
+
end
|
263
274
|
end
|
275
|
+
@debug.save "-> [headers] = #{@headers}" if @debug.active
|
264
276
|
end
|
265
|
-
@debug.save "-> [headers] = #{@headers}" if @debug.active
|
266
|
-
end
|
267
|
-
|
268
277
|
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
end
|
275
|
-
|
276
|
-
|
277
|
-
# Send request and get response
|
278
|
-
# Use SSL connect for HTTPS link scheme
|
279
|
-
#
|
280
|
-
def send_request
|
281
|
-
@http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http|
|
282
|
-
@debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
|
283
|
-
http.request(@request)
|
278
|
+
# Set request COOKIES
|
279
|
+
#
|
280
|
+
def set_cookies
|
281
|
+
@request['Cookie'] = @cookies
|
282
|
+
@debug.save "-> [cookies] = #{@cookies}" if @debug.active
|
284
283
|
end
|
285
|
-
end
|
286
|
-
|
287
|
-
|
288
|
-
# Save response headers in agent attribute
|
289
|
-
#
|
290
|
-
def save_headers
|
291
|
-
@headers = @response.headers
|
292
|
-
# Delete header TRANSFER_ENCODING for chain of requests
|
293
|
-
@headers.delete('transfer-encoding')
|
294
|
-
@debug.save "<- [headers] = #{@headers}" if @debug.active
|
295
|
-
end
|
296
284
|
|
285
|
+
# Send request and get response
|
286
|
+
# Use SSL connect for HTTPS link scheme
|
287
|
+
#
|
288
|
+
def send_request
|
289
|
+
@http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http|
|
290
|
+
@debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
|
291
|
+
http.request(@request)
|
292
|
+
end
|
293
|
+
end
|
297
294
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
295
|
+
# Save response headers in agent attribute
|
296
|
+
#
|
297
|
+
def save_headers
|
298
|
+
@headers = @response.headers
|
299
|
+
# Delete header TRANSFER_ENCODING for chain of requests
|
300
|
+
@headers.delete('transfer-encoding')
|
301
|
+
@debug.save "<- [headers] = #{@headers}" if @debug.active
|
302
|
+
end
|
304
303
|
|
304
|
+
# Save response cookies in agent attribute
|
305
|
+
#
|
306
|
+
def save_cookies
|
307
|
+
@cookies = @response.cookies
|
308
|
+
@debug.save "<- [cookies] = #{@cookies}" if @debug.active
|
309
|
+
end
|
305
310
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
311
|
+
# Clears headers and cookies
|
312
|
+
#
|
313
|
+
def reset
|
314
|
+
@headers = {}
|
315
|
+
@cookies = nil
|
316
|
+
end
|
312
317
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
318
|
+
# Convert variables and contains to symbol
|
319
|
+
#
|
320
|
+
# @param var Variable need to convert
|
321
|
+
#
|
322
|
+
def var_to_sym(var, str_to_sym = false)
|
323
|
+
if var.is_a?(Hash)
|
324
|
+
result = {}
|
325
|
+
var.each do |k, v|
|
326
|
+
result[k.to_sym] = var_to_sym(v, str_to_sym)
|
327
|
+
end
|
328
|
+
elsif var.is_a?(Array)
|
329
|
+
result = []
|
330
|
+
var.each do |v|
|
331
|
+
result << var_to_sym(v, str_to_sym)
|
332
|
+
end
|
333
|
+
elsif var.is_a?(String)
|
334
|
+
result = str_to_sym ? var.to_sym : var
|
335
|
+
else
|
336
|
+
result = var
|
327
337
|
end
|
328
|
-
|
329
|
-
result = str_to_sym ? var.to_sym : var
|
330
|
-
else
|
331
|
-
result = var
|
338
|
+
result
|
332
339
|
end
|
333
|
-
result
|
334
340
|
end
|
335
|
-
end
|
341
|
+
end
|
data/lib/tiny_grabber/debug.rb
CHANGED
@@ -15,76 +15,30 @@ class Debug
|
|
15
15
|
@save_html = false
|
16
16
|
end
|
17
17
|
|
18
|
-
|
19
|
-
# Set debug active flag
|
20
|
-
#
|
21
|
-
# @param active Flag
|
22
|
-
#
|
23
|
-
def active= active
|
24
|
-
@active = active
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
# Get debug active flag
|
29
|
-
def active
|
30
|
-
@active
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
# Set debug destination
|
35
|
-
#
|
36
|
-
# @param destination Save log to file or print
|
37
|
-
#
|
38
|
-
def destination= destination
|
39
|
-
@destination = destination
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
# Get debug destination
|
44
|
-
#
|
45
|
-
def destination
|
46
|
-
@destination
|
47
|
-
end
|
48
|
-
|
49
|
-
|
50
|
-
# Set debug flag to save response HTML to file
|
51
|
-
#
|
52
|
-
# @param save_html Flag
|
53
|
-
def save_html= save_html
|
54
|
-
@save_html = save_html
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
# Get debug flag to save response HTML to file
|
59
|
-
#
|
60
|
-
def save_html
|
61
|
-
@save_html
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
18
|
# Save log information
|
66
19
|
#
|
67
20
|
# @param message Message body
|
68
21
|
#
|
69
|
-
def save
|
22
|
+
def save(message)
|
70
23
|
message = "TG | #{Time.now.strftime('%Y%m%d-%H%M%S')} | #{message}"
|
71
24
|
case @destination
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
25
|
+
when :file
|
26
|
+
save_to_file message
|
27
|
+
when :print
|
28
|
+
p message
|
76
29
|
end
|
77
30
|
end
|
78
31
|
|
79
|
-
|
80
32
|
# Save log information to file
|
81
33
|
#
|
82
34
|
# @param message Message body
|
83
35
|
#
|
84
|
-
def save_to_file
|
36
|
+
def save_to_file(message)
|
37
|
+
# Encode message for correct Unix encoding
|
38
|
+
message = message.force_encoding('utf-8')
|
85
39
|
debug_path = "#{Dir.pwd}/log"
|
86
|
-
Dir.mkdir(debug_path,
|
40
|
+
Dir.mkdir(debug_path, 0o775) unless File.exist? debug_path
|
87
41
|
filename = "#{Time.now.strftime('%Y%m%d')}.log"
|
88
42
|
File.open("#{debug_path}/#{filename}", 'a+') { |f| f << "#{message}\r\n" }
|
89
43
|
end
|
90
|
-
end
|
44
|
+
end
|
data/lib/tiny_grabber/http.rb
CHANGED
@@ -4,29 +4,27 @@ require 'nokogiri'
|
|
4
4
|
module Net
|
5
5
|
# Success response class
|
6
6
|
class HTTPOK
|
7
|
-
|
8
7
|
# Nokogiri object of response
|
9
8
|
#
|
10
9
|
def ng
|
11
|
-
Nokogiri::HTML(
|
10
|
+
Nokogiri::HTML(body)
|
12
11
|
end
|
13
12
|
|
14
13
|
# Response Cookies
|
15
14
|
#
|
16
15
|
def cookies
|
17
|
-
cookies =
|
16
|
+
cookies = get_fields('set-cookie')
|
18
17
|
if cookies
|
19
18
|
cookies.map { |cookie| cookie.gsub(/\A([^;]+).*\Z/, '\1') }.join('&')
|
20
|
-
else
|
21
|
-
nil
|
22
19
|
end
|
23
20
|
end
|
24
21
|
|
25
|
-
|
26
22
|
# Response Headers
|
27
23
|
#
|
28
24
|
def headers
|
29
|
-
|
25
|
+
header.to_hash.each_with_object({}) do |header_key, header_value|
|
26
|
+
header_value[header_key] = header_value.first
|
27
|
+
end
|
30
28
|
end
|
31
29
|
end
|
32
|
-
end
|
30
|
+
end
|
data/lib/tiny_grabber/version.rb
CHANGED
data/lib/tiny_grabber.rb
CHANGED
@@ -10,17 +10,15 @@ require 'tiny_grabber/http'
|
|
10
10
|
# Main class for TinyGrabber
|
11
11
|
#
|
12
12
|
class TinyGrabber
|
13
|
-
|
14
13
|
# Initialize a new TinyGrabber user agent.
|
15
14
|
#
|
16
15
|
def initialize
|
17
16
|
@agent = TinyGrabber::Agent.new
|
18
17
|
end
|
19
18
|
|
20
|
-
|
21
19
|
# Singleton > Initialize a new TinyGrabber user agent.
|
22
20
|
#
|
23
|
-
def self.initialize
|
21
|
+
def self.initialize(config = {})
|
24
22
|
@agent = TinyGrabber::Agent.new
|
25
23
|
|
26
24
|
@agent.debug = config[:debug] if config[:debug]
|
@@ -32,13 +30,12 @@ class TinyGrabber
|
|
32
30
|
@agent.cookies = config[:cookies] if config[:cookies]
|
33
31
|
end
|
34
32
|
|
35
|
-
|
36
33
|
# HTTP::GET request
|
37
34
|
#
|
38
35
|
# @param url Resource link
|
39
36
|
# @param headers Request header
|
40
37
|
#
|
41
|
-
def get
|
38
|
+
def get(url, headers = {})
|
42
39
|
@agent.fetch url, :get, headers
|
43
40
|
end
|
44
41
|
|
@@ -47,129 +44,115 @@ class TinyGrabber
|
|
47
44
|
# @param url Resource link
|
48
45
|
# @param headers Request header
|
49
46
|
#
|
50
|
-
def self.get
|
51
|
-
initialize
|
47
|
+
def self.get(url, config = {})
|
48
|
+
initialize config
|
52
49
|
@agent.fetch url, :get
|
53
50
|
end
|
54
51
|
|
55
|
-
|
56
52
|
# HTTP::POST request
|
57
53
|
#
|
58
54
|
# @param url Resource link
|
59
55
|
# @param params Request post data
|
60
56
|
# @param headers Request header
|
61
57
|
#
|
62
|
-
def post
|
58
|
+
def post(url, params = {}, headers = {})
|
63
59
|
@agent.fetch url, :post, headers, params
|
64
60
|
end
|
65
61
|
|
66
|
-
|
67
62
|
# Singleton > HTTP::GET request
|
68
63
|
#
|
69
64
|
# @param url Resource link
|
70
65
|
# @param headers Request header
|
71
66
|
#
|
72
|
-
def self.post
|
73
|
-
initialize
|
67
|
+
def self.post(url, params = {}, config = {})
|
68
|
+
initialize config
|
74
69
|
@agent.fetch url, :post, {}, params
|
75
70
|
end
|
76
71
|
|
77
|
-
|
78
72
|
# Set DEBUG flag
|
79
73
|
#
|
80
74
|
# @param debug Flag to start debug
|
81
75
|
#
|
82
|
-
def debug=
|
76
|
+
def debug=(debug)
|
83
77
|
@agent.debug = debug
|
84
78
|
end
|
85
79
|
|
86
|
-
|
87
80
|
# Read READ_TIMEOUT agent attribute
|
88
81
|
#
|
89
82
|
def read_timeout
|
90
83
|
@agent.read_timeout
|
91
84
|
end
|
92
85
|
|
93
|
-
|
94
86
|
# Set READ_TIMEOUT agent attribute
|
95
87
|
#
|
96
88
|
# @param read_timeout Waiting time to reading
|
97
89
|
#
|
98
|
-
def read_timeout=
|
90
|
+
def read_timeout=(read_timeout)
|
99
91
|
@agent.read_timeout = read_timeout
|
100
92
|
end
|
101
93
|
|
102
|
-
|
103
94
|
# Read USER_AGENT agent attribute
|
104
95
|
#
|
105
96
|
def user_agent
|
106
97
|
@agent.user_agent
|
107
98
|
end
|
108
99
|
|
109
|
-
|
110
100
|
# Set USER_AGENT agent attribute
|
111
101
|
#
|
112
102
|
# @param user_agent Web browser name
|
113
103
|
#
|
114
|
-
def user_agent=
|
104
|
+
def user_agent=(user_agent)
|
115
105
|
@agent.user_agent = user_agent
|
116
106
|
end
|
117
107
|
|
118
|
-
|
119
108
|
# Read PROXY agent attribute
|
120
109
|
#
|
121
110
|
def proxy
|
122
111
|
@agent.proxy
|
123
112
|
end
|
124
113
|
|
125
|
-
|
126
114
|
# Set PROXY agent attribute
|
127
115
|
#
|
128
116
|
# @param proxy Proxy configuration
|
129
117
|
#
|
130
|
-
def proxy=
|
118
|
+
def proxy=(proxy)
|
131
119
|
@agent.proxy = proxy
|
132
120
|
end
|
133
121
|
|
134
|
-
|
135
122
|
# Set BASIC_AUTH agent attribute
|
136
123
|
#
|
137
124
|
# @param username Authentification username
|
138
125
|
# @param password Authentification password
|
139
126
|
#
|
140
|
-
def basic_auth
|
127
|
+
def basic_auth(username, password)
|
141
128
|
@agent.basic_auth = { username: username, password: password }
|
142
129
|
end
|
143
130
|
|
144
|
-
|
145
131
|
# Read HEADERS agent attribute
|
146
132
|
#
|
147
133
|
def headers
|
148
134
|
@agent.headers
|
149
135
|
end
|
150
136
|
|
151
|
-
|
152
137
|
# Set HEADERS agent attribute
|
153
138
|
#
|
154
139
|
# @param headers Request headers
|
155
140
|
#
|
156
|
-
def headers=
|
141
|
+
def headers=(headers)
|
157
142
|
@agent.headers = headers
|
158
143
|
end
|
159
144
|
|
160
|
-
|
161
145
|
# Read COOKIES agent attribute
|
162
146
|
#
|
163
147
|
def cookies
|
164
148
|
@agent.cookies
|
165
149
|
end
|
166
150
|
|
167
|
-
|
168
151
|
# Set COOKIES agent attribute
|
169
152
|
#
|
170
153
|
# @param cookies Request cookies
|
171
154
|
#
|
172
|
-
def cookies=
|
155
|
+
def cookies=(cookies)
|
173
156
|
@agent.cookies = cookies
|
174
157
|
end
|
175
158
|
|
@@ -179,12 +162,19 @@ class TinyGrabber
|
|
179
162
|
@agent.reset
|
180
163
|
end
|
181
164
|
|
182
|
-
|
183
165
|
# Set verify_mode
|
184
166
|
#
|
185
167
|
# @param verify_mode SSL verify mode
|
186
168
|
#
|
187
|
-
def verify_mode=
|
169
|
+
def verify_mode=(verify_mode)
|
188
170
|
@agent.verify_mode = verify_mode
|
189
171
|
end
|
190
|
-
|
172
|
+
|
173
|
+
# Set follow_location
|
174
|
+
#
|
175
|
+
# @param follow_location Follow location flag
|
176
|
+
#
|
177
|
+
def follow_location=(follow_location)
|
178
|
+
@agent.follow_location = follow_location
|
179
|
+
end
|
180
|
+
end
|
data/tiny_grabber.gemspec
CHANGED
@@ -6,11 +6,11 @@ require 'tiny_grabber/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = 'tiny_grabber'
|
8
8
|
spec.version = TinyGrabber::VERSION
|
9
|
-
spec.authors = [
|
9
|
+
spec.authors = ['Aleksandr Chernyshev']
|
10
10
|
spec.email = ['moroznoeytpo@gmail.com']
|
11
11
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
12
|
+
spec.summary = 'Tiny grabber'
|
13
|
+
spec.description = 'Simple gem for grabbing remote web page.'
|
14
14
|
spec.homepage = 'https://github.com/moroznoeytpo/tiny_grabber'
|
15
15
|
spec.license = 'MIT'
|
16
16
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny_grabber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksandr Chernyshev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: socksify
|