tiny_grabber 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -10
- data/lib/tiny_grabber/agent.rb +69 -31
- data/lib/tiny_grabber/debug.rb +64 -4
- data/lib/tiny_grabber/version.rb +1 -1
- data/lib/tiny_grabber.rb +1 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8385d450b76a0d89ffa5586ee9f9a684f56bce00
|
4
|
+
data.tar.gz: a6691e3ad022cf3ad32d1924ce947b28786d8bda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72639e18d054c04ac01549d4cc044395b44dff3267ea51408d77488ec56b9130ea104d796b07c73252ed2b40bbbfa1222ac6fe64a8e90e3f728f6f65ee33fef6
|
7
|
+
data.tar.gz: 967877a786a60e4204cdf8066e7ab27a997449f7ac8a9fa73e74ce2ca50cecc820df586e70205e548747305d7f05476936d6149466544b28165660591270f969
|
data/README.md
CHANGED
@@ -32,31 +32,39 @@ require 'tiny_grabber'
|
|
32
32
|
|
33
33
|
# Set request timelive
|
34
34
|
read_timeout = 300
|
35
|
+
|
35
36
|
# You can set own UserAgent, but by default each request get random UserAgent from list of most popular
|
36
37
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36'
|
38
|
+
|
37
39
|
# Set proxy for concealment your real IP
|
38
|
-
|
40
|
+
# ip(required argument) - String format [0-9]+\.[0-9]+\.[0-9]+\.
|
41
|
+
# port(required argument) - Integer
|
42
|
+
# type - Connect type `http` or `socks`
|
43
|
+
proxy = { ip: 'xx.xx.xx.xx', port: 'xxxx', type: '...' }
|
44
|
+
|
39
45
|
# Set Net::HTTP headers
|
40
46
|
headers = { 'Content-Type' => 'text/html; charset=utf-8' }
|
41
|
-
|
47
|
+
|
48
|
+
# You can set own cookies like String or Hash
|
42
49
|
cookies = 'username=username&password=password'
|
43
|
-
# or Hash
|
44
50
|
cookies = { username: 'username', password: 'password' }
|
51
|
+
|
45
52
|
# For POST request you can set DATAS
|
46
53
|
params = { key: 'value' }
|
47
54
|
|
48
55
|
# Initialize TinyGrabber object
|
49
56
|
tg = TinyGrabber.new
|
50
57
|
|
51
|
-
# Set debug flag for view log information
|
52
|
-
tg.debug = true
|
53
58
|
|
54
|
-
#
|
55
|
-
# active -
|
56
|
-
# destination -
|
57
|
-
# save_html -
|
59
|
+
# Set debug configuration
|
60
|
+
# active - Flag to save log information
|
61
|
+
# destination - Save log to file or print: [:file, :print]
|
62
|
+
# save_html - Flag to save response html to file
|
58
63
|
tg.debug = { active: true, destination: :file, save_html: true }
|
59
64
|
|
65
|
+
# Set debug flag for activate debug with default configuration { active: true, destination: :print, save_html: false }
|
66
|
+
tg.debug = true
|
67
|
+
|
60
68
|
# Set max time to execute request
|
61
69
|
tg.read_timeout = read_timeout
|
62
70
|
|
@@ -114,8 +122,11 @@ response.body
|
|
114
122
|
|
115
123
|
## Changelog
|
116
124
|
|
125
|
+
* *v 0.2.5*
|
126
|
+
* Added auto convert params to symbol
|
127
|
+
Now you can set cookies with hash `cookies = { "username" => 'username', "password" => 'password' }`
|
117
128
|
* *v 0.2.4*
|
118
|
-
*
|
129
|
+
* Added debug file
|
119
130
|
* *v 0.2.3*
|
120
131
|
* The feature to set cookies in the form of a Hash is added
|
121
132
|
* *v 0.2.2*
|
data/lib/tiny_grabber/agent.rb
CHANGED
@@ -3,12 +3,8 @@
|
|
3
3
|
# Setting connect attributes
|
4
4
|
#
|
5
5
|
class TinyGrabber::Agent
|
6
|
-
# Debug
|
6
|
+
# Debug configuration
|
7
7
|
attr_accessor :debug
|
8
|
-
# Debug destination type
|
9
|
-
attr_accessor :debug_destination
|
10
|
-
# Debug flag for save html in file
|
11
|
-
attr_accessor :debug_save_html
|
12
8
|
# Max time to execute request
|
13
9
|
attr_accessor :read_timeout
|
14
10
|
# Web browser name
|
@@ -49,9 +45,7 @@ class TinyGrabber::Agent
|
|
49
45
|
# Initialization object
|
50
46
|
#
|
51
47
|
def initialize
|
52
|
-
@debug =
|
53
|
-
@debug_destination = :file
|
54
|
-
@debug_save_html = false
|
48
|
+
@debug = Debug.new
|
55
49
|
|
56
50
|
# Initialize variables agent attributes
|
57
51
|
@user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
|
@@ -69,6 +63,22 @@ class TinyGrabber::Agent
|
|
69
63
|
end
|
70
64
|
|
71
65
|
|
66
|
+
# Set debug configuration
|
67
|
+
#
|
68
|
+
# @param debug
|
69
|
+
#
|
70
|
+
def debug= debug
|
71
|
+
debug = var_to_sym(debug, true)
|
72
|
+
if debug.is_a?(Hash)
|
73
|
+
@debug.active = debug[:active]
|
74
|
+
@debug.destination = debug[:destination]
|
75
|
+
@debug.save_html = debug[:save_html]
|
76
|
+
elsif debug.is_a?(TrueClass)
|
77
|
+
@debug.active = true
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
|
72
82
|
# Set READ_TIMEOUT agent attribute
|
73
83
|
#
|
74
84
|
# @param read_timeout Waiting time to reading
|
@@ -96,16 +106,18 @@ class TinyGrabber::Agent
|
|
96
106
|
#
|
97
107
|
def proxy= proxy
|
98
108
|
if proxy.is_a?(String)
|
99
|
-
ip, port = proxy.split(':')
|
109
|
+
ip, port, type = proxy.split(':')
|
100
110
|
fail 'attribute proxy must be in format ip:port' unless ip and port
|
101
|
-
|
111
|
+
type ||= :http
|
112
|
+
proxy = { ip: ip, port: port, type: type }
|
102
113
|
end
|
114
|
+
proxy = var_to_sym(proxy)
|
103
115
|
fail 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
|
104
116
|
fail 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] and proxy[:port]
|
105
117
|
|
106
118
|
@proxy = proxy
|
107
|
-
if [
|
108
|
-
@http = Net::HTTP.SOCKSProxy(proxy[:ip], proxy[:port])
|
119
|
+
if [:socks, 'socks'].include? proxy[:type]
|
120
|
+
@http = Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s)
|
109
121
|
else
|
110
122
|
@http = Net::HTTP::Proxy(proxy[:ip], proxy[:port])
|
111
123
|
end
|
@@ -117,6 +129,7 @@ class TinyGrabber::Agent
|
|
117
129
|
# @param basic_auth Authentification configuration
|
118
130
|
#
|
119
131
|
def basic_auth= basic_auth
|
132
|
+
basic_auth = var_to_sym(basic_auth)
|
120
133
|
fail 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
|
121
134
|
fail 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] and basic_auth[:password]
|
122
135
|
@basic_auth = basic_auth
|
@@ -138,6 +151,7 @@ class TinyGrabber::Agent
|
|
138
151
|
# @param cookies Request cookies
|
139
152
|
#
|
140
153
|
def cookies= cookies
|
154
|
+
cookies = var_to_sym(cookies)
|
141
155
|
cookies = cookies.to_a.map { |x| "#{x[0]}=#{x[1]}" }.join('&') if cookies.is_a?(Hash)
|
142
156
|
fail 'attribute cookies must be String' unless cookies.is_a?(String)
|
143
157
|
@cookies = cookies
|
@@ -154,11 +168,12 @@ class TinyGrabber::Agent
|
|
154
168
|
# @param params Request additional params
|
155
169
|
#
|
156
170
|
def fetch url, method = :get, headers = {}, params = {}
|
157
|
-
if @debug
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
171
|
+
if @debug.active
|
172
|
+
@debug.save '=============================='
|
173
|
+
@debug.save "#{method.upcase} #{url}"
|
174
|
+
@debug.save "-> [proxy] = #{@proxy}" if @proxy
|
175
|
+
@debug.save "-> [params] = #{params}"
|
176
|
+
@debug.save '------------------------------'
|
162
177
|
end
|
163
178
|
set_uri url
|
164
179
|
case method
|
@@ -177,23 +192,23 @@ class TinyGrabber::Agent
|
|
177
192
|
case @response
|
178
193
|
# HTTP response code 1xx
|
179
194
|
when Net::HTTPInformation
|
180
|
-
|
195
|
+
@debug.save "<- [response] = Net::HTTPInformation" if @debug.active
|
181
196
|
# HTTP response code 2xx
|
182
197
|
when Net::HTTPSuccess
|
183
198
|
save_headers if @response.header
|
184
199
|
save_cookies if @response.cookies
|
185
|
-
|
200
|
+
@debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active
|
186
201
|
# HTTP response code 3xx
|
187
202
|
when Net::HTTPRedirection
|
188
|
-
|
203
|
+
@debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active
|
189
204
|
# HTTP response code 4xx
|
190
205
|
when Net::HTTPClientError
|
191
|
-
|
206
|
+
@debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active
|
192
207
|
# HTTP response code 5xx
|
193
208
|
when Net::HTTPServerError
|
194
|
-
|
209
|
+
@debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active
|
195
210
|
end
|
196
|
-
|
211
|
+
@debug.save_to_file @response.body if @debug.save_html
|
197
212
|
@response
|
198
213
|
end
|
199
214
|
|
@@ -205,7 +220,7 @@ class TinyGrabber::Agent
|
|
205
220
|
def set_uri url
|
206
221
|
# It's magic work with escaped url
|
207
222
|
@uri = URI(URI.escape(URI.unescape(url)))
|
208
|
-
|
223
|
+
@debug.save "-> [uri] = #{@uri}" if @debug.active
|
209
224
|
end
|
210
225
|
|
211
226
|
|
@@ -213,7 +228,7 @@ class TinyGrabber::Agent
|
|
213
228
|
#
|
214
229
|
def set_user_agent
|
215
230
|
@headers['User-Agent'] = @user_agent
|
216
|
-
|
231
|
+
@debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
|
217
232
|
end
|
218
233
|
|
219
234
|
|
@@ -221,7 +236,7 @@ class TinyGrabber::Agent
|
|
221
236
|
#
|
222
237
|
def set_basic_auth
|
223
238
|
@request.basic_auth @basic_auth[:username], @basic_auth[:password]
|
224
|
-
|
239
|
+
@debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
|
225
240
|
end
|
226
241
|
|
227
242
|
|
@@ -229,7 +244,7 @@ class TinyGrabber::Agent
|
|
229
244
|
#
|
230
245
|
def set_headers
|
231
246
|
@headers.each { |k, v| @request.add_field(String(k), v) }
|
232
|
-
|
247
|
+
@debug.save "-> [headers] = #{@headers}" if @debug.active
|
233
248
|
end
|
234
249
|
|
235
250
|
|
@@ -237,7 +252,7 @@ class TinyGrabber::Agent
|
|
237
252
|
#
|
238
253
|
def set_cookies
|
239
254
|
@request['Cookie'] = @cookies
|
240
|
-
|
255
|
+
@debug.save "-> [cookies] = #{@cookies}" if @debug.active
|
241
256
|
end
|
242
257
|
|
243
258
|
|
@@ -247,7 +262,7 @@ class TinyGrabber::Agent
|
|
247
262
|
def send_request
|
248
263
|
@http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
|
249
264
|
http.read_timeout = @read_timeout
|
250
|
-
|
265
|
+
@debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
|
251
266
|
http.request(@request)
|
252
267
|
end
|
253
268
|
end
|
@@ -259,7 +274,7 @@ class TinyGrabber::Agent
|
|
259
274
|
@headers = @response.headers
|
260
275
|
# Delete header TRANSFER_ENCODING for chain of requests
|
261
276
|
@headers.delete('transfer-encoding')
|
262
|
-
|
277
|
+
@debug.save "<- [headers] = #{@headers}" if @debug.active
|
263
278
|
end
|
264
279
|
|
265
280
|
|
@@ -267,7 +282,7 @@ class TinyGrabber::Agent
|
|
267
282
|
#
|
268
283
|
def save_cookies
|
269
284
|
@cookies = @response.cookies
|
270
|
-
|
285
|
+
@debug.save "<- [cookies] = #{@cookies}" if @debug.active
|
271
286
|
end
|
272
287
|
|
273
288
|
|
@@ -277,4 +292,27 @@ class TinyGrabber::Agent
|
|
277
292
|
@headers = {}
|
278
293
|
@cookies = nil
|
279
294
|
end
|
295
|
+
|
296
|
+
# Convert variables and contains to symbol
|
297
|
+
#
|
298
|
+
# @param var Variable need to convert
|
299
|
+
#
|
300
|
+
def var_to_sym var, str_to_sym = false
|
301
|
+
if var.is_a?(Hash)
|
302
|
+
result = {}
|
303
|
+
var.each do |k, v|
|
304
|
+
result[k.to_sym] = var_to_sym(v, str_to_sym)
|
305
|
+
end
|
306
|
+
elsif var.is_a?(Array)
|
307
|
+
result = []
|
308
|
+
var.each do |v|
|
309
|
+
result << var_to_sym(v, str_to_sym)
|
310
|
+
end
|
311
|
+
elsif var.is_a?(String)
|
312
|
+
result = str_to_sym ? var.to_sym : var
|
313
|
+
else
|
314
|
+
result = var
|
315
|
+
end
|
316
|
+
result
|
317
|
+
end
|
280
318
|
end
|
data/lib/tiny_grabber/debug.rb
CHANGED
@@ -1,14 +1,74 @@
|
|
1
1
|
# Save debug log information
|
2
2
|
class Debug
|
3
|
+
# Flag to active debug save log
|
4
|
+
attr_accessor :active
|
5
|
+
# Print log or save to file
|
6
|
+
attr_accessor :destination
|
7
|
+
# Flag to save response HTML to file
|
8
|
+
attr_accessor :save_html
|
3
9
|
|
4
|
-
#
|
10
|
+
# Initialize a debug object
|
11
|
+
#
|
12
|
+
def initialize
|
13
|
+
@active = false
|
14
|
+
@destination = :print
|
15
|
+
@save_html = false
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# Set debug active flag
|
20
|
+
#
|
21
|
+
# @param active Flag
|
22
|
+
#
|
23
|
+
def active= active
|
24
|
+
@active = active
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Get debug active flag
|
29
|
+
def active
|
30
|
+
@active
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
# Set debug destination
|
5
35
|
#
|
6
36
|
# @param destination Save log to file or print
|
37
|
+
#
|
38
|
+
def destination= destination
|
39
|
+
@destination = destination
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# Get debug destination
|
44
|
+
#
|
45
|
+
def destination
|
46
|
+
@destination
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
# Set debug flag to save response HTML to file
|
51
|
+
#
|
52
|
+
# @param save_html Flag
|
53
|
+
def save_html= save_html
|
54
|
+
@save_html = save_html
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
# Get debug flag to save response HTML to file
|
59
|
+
#
|
60
|
+
def save_html
|
61
|
+
@save_html
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
# Save log information
|
66
|
+
#
|
7
67
|
# @param message Message body
|
8
68
|
#
|
9
|
-
def
|
69
|
+
def save message
|
10
70
|
message = "TG | #{Time.now.strftime('%Y%m%d-%H%M%S')} | #{message}"
|
11
|
-
case destination
|
71
|
+
case @destination
|
12
72
|
when :file
|
13
73
|
save_to_file message
|
14
74
|
when :print
|
@@ -21,7 +81,7 @@ class Debug
|
|
21
81
|
#
|
22
82
|
# @param message Message body
|
23
83
|
#
|
24
|
-
def
|
84
|
+
def save_to_file message
|
25
85
|
debug_path = "#{Dir.pwd}/log"
|
26
86
|
Dir.mkdir(debug_path, 0775) unless File.exists? debug_path
|
27
87
|
filename = "#{Time.now.strftime('%Y%m%d')}.log"
|
data/lib/tiny_grabber/version.rb
CHANGED
data/lib/tiny_grabber.rb
CHANGED
@@ -80,15 +80,7 @@ class TinyGrabber
|
|
80
80
|
# @param debug Flag to start debug
|
81
81
|
#
|
82
82
|
def debug= debug
|
83
|
-
|
84
|
-
@agent.debug = debug
|
85
|
-
@agent.debug_destination = :print
|
86
|
-
@agent.debug_save_html = false
|
87
|
-
elsif debug.is_a? Hash
|
88
|
-
@agent.debug = debug[:active]
|
89
|
-
@agent.debug_destination = debug[:destination]
|
90
|
-
@agent.debug_save_html = debug[:save_html]
|
91
|
-
end
|
83
|
+
@agent.debug = debug
|
92
84
|
end
|
93
85
|
|
94
86
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny_grabber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksandr Chernyshev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: socksify
|