tiny_grabber 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -10
- data/lib/tiny_grabber/agent.rb +69 -31
- data/lib/tiny_grabber/debug.rb +64 -4
- data/lib/tiny_grabber/version.rb +1 -1
- data/lib/tiny_grabber.rb +1 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8385d450b76a0d89ffa5586ee9f9a684f56bce00
|
4
|
+
data.tar.gz: a6691e3ad022cf3ad32d1924ce947b28786d8bda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72639e18d054c04ac01549d4cc044395b44dff3267ea51408d77488ec56b9130ea104d796b07c73252ed2b40bbbfa1222ac6fe64a8e90e3f728f6f65ee33fef6
|
7
|
+
data.tar.gz: 967877a786a60e4204cdf8066e7ab27a997449f7ac8a9fa73e74ce2ca50cecc820df586e70205e548747305d7f05476936d6149466544b28165660591270f969
|
data/README.md
CHANGED
@@ -32,31 +32,39 @@ require 'tiny_grabber'
|
|
32
32
|
|
33
33
|
# Set request timelive
|
34
34
|
read_timeout = 300
|
35
|
+
|
35
36
|
# You can set own UserAgent, but by default each request get random UserAgent from list of most popular
|
36
37
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36'
|
38
|
+
|
37
39
|
# Set proxy for concealment your real IP
|
38
|
-
|
40
|
+
# ip(required argument) - String format [0-9]+\.[0-9]+\.[0-9]+\.
|
41
|
+
# port(required argument) - Integer
|
42
|
+
# type - Connect type `http` or `socks`
|
43
|
+
proxy = { ip: 'xx.xx.xx.xx', port: 'xxxx', type: '...' }
|
44
|
+
|
39
45
|
# Set Net::HTTP headers
|
40
46
|
headers = { 'Content-Type' => 'text/html; charset=utf-8' }
|
41
|
-
|
47
|
+
|
48
|
+
# You can set own cookies like String or Hash
|
42
49
|
cookies = 'username=username&password=password'
|
43
|
-
# or Hash
|
44
50
|
cookies = { username: 'username', password: 'password' }
|
51
|
+
|
45
52
|
# For POST request you can set DATAS
|
46
53
|
params = { key: 'value' }
|
47
54
|
|
48
55
|
# Initialize TinyGrabber object
|
49
56
|
tg = TinyGrabber.new
|
50
57
|
|
51
|
-
# Set debug flag for view log information
|
52
|
-
tg.debug = true
|
53
58
|
|
54
|
-
#
|
55
|
-
# active -
|
56
|
-
# destination -
|
57
|
-
# save_html -
|
59
|
+
# Set debug configuration
|
60
|
+
# active - Flag to save log information
|
61
|
+
# destination - Save log to file or print: [:file, :print]
|
62
|
+
# save_html - Flag to save response html to file
|
58
63
|
tg.debug = { active: true, destination: :file, save_html: true }
|
59
64
|
|
65
|
+
# Set debug flag for activate debug with default configuration { active: true, destination: :print, save_html: false }
|
66
|
+
tg.debug = true
|
67
|
+
|
60
68
|
# Set max time to execute request
|
61
69
|
tg.read_timeout = read_timeout
|
62
70
|
|
@@ -114,8 +122,11 @@ response.body
|
|
114
122
|
|
115
123
|
## Changelog
|
116
124
|
|
125
|
+
* *v 0.2.5*
|
126
|
+
* Added auto convert params to symbol
|
127
|
+
Now you can set cookies with hash `cookies = { "username" => 'username', "password" => 'password' }`
|
117
128
|
* *v 0.2.4*
|
118
|
-
*
|
129
|
+
* Added debug file
|
119
130
|
* *v 0.2.3*
|
120
131
|
* The feature to set cookies in the form of a Hash is added
|
121
132
|
* *v 0.2.2*
|
data/lib/tiny_grabber/agent.rb
CHANGED
@@ -3,12 +3,8 @@
|
|
3
3
|
# Setting connect attributes
|
4
4
|
#
|
5
5
|
class TinyGrabber::Agent
|
6
|
-
# Debug
|
6
|
+
# Debug configuration
|
7
7
|
attr_accessor :debug
|
8
|
-
# Debug destination type
|
9
|
-
attr_accessor :debug_destination
|
10
|
-
# Debug flag for save html in file
|
11
|
-
attr_accessor :debug_save_html
|
12
8
|
# Max time to execute request
|
13
9
|
attr_accessor :read_timeout
|
14
10
|
# Web browser name
|
@@ -49,9 +45,7 @@ class TinyGrabber::Agent
|
|
49
45
|
# Initialization object
|
50
46
|
#
|
51
47
|
def initialize
|
52
|
-
@debug =
|
53
|
-
@debug_destination = :file
|
54
|
-
@debug_save_html = false
|
48
|
+
@debug = Debug.new
|
55
49
|
|
56
50
|
# Initialize variables agent attributes
|
57
51
|
@user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
|
@@ -69,6 +63,22 @@ class TinyGrabber::Agent
|
|
69
63
|
end
|
70
64
|
|
71
65
|
|
66
|
+
# Set debug configuration
|
67
|
+
#
|
68
|
+
# @param debug
|
69
|
+
#
|
70
|
+
def debug= debug
|
71
|
+
debug = var_to_sym(debug, true)
|
72
|
+
if debug.is_a?(Hash)
|
73
|
+
@debug.active = debug[:active]
|
74
|
+
@debug.destination = debug[:destination]
|
75
|
+
@debug.save_html = debug[:save_html]
|
76
|
+
elsif debug.is_a?(TrueClass)
|
77
|
+
@debug.active = true
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
|
72
82
|
# Set READ_TIMEOUT agent attribute
|
73
83
|
#
|
74
84
|
# @param read_timeout Waiting time to reading
|
@@ -96,16 +106,18 @@ class TinyGrabber::Agent
|
|
96
106
|
#
|
97
107
|
def proxy= proxy
|
98
108
|
if proxy.is_a?(String)
|
99
|
-
ip, port = proxy.split(':')
|
109
|
+
ip, port, type = proxy.split(':')
|
100
110
|
fail 'attribute proxy must be in format ip:port' unless ip and port
|
101
|
-
|
111
|
+
type ||= :http
|
112
|
+
proxy = { ip: ip, port: port, type: type }
|
102
113
|
end
|
114
|
+
proxy = var_to_sym(proxy)
|
103
115
|
fail 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
|
104
116
|
fail 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] and proxy[:port]
|
105
117
|
|
106
118
|
@proxy = proxy
|
107
|
-
if [
|
108
|
-
@http = Net::HTTP.SOCKSProxy(proxy[:ip], proxy[:port])
|
119
|
+
if [:socks, 'socks'].include? proxy[:type]
|
120
|
+
@http = Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s)
|
109
121
|
else
|
110
122
|
@http = Net::HTTP::Proxy(proxy[:ip], proxy[:port])
|
111
123
|
end
|
@@ -117,6 +129,7 @@ class TinyGrabber::Agent
|
|
117
129
|
# @param basic_auth Authentification configuration
|
118
130
|
#
|
119
131
|
def basic_auth= basic_auth
|
132
|
+
basic_auth = var_to_sym(basic_auth)
|
120
133
|
fail 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
|
121
134
|
fail 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] and basic_auth[:password]
|
122
135
|
@basic_auth = basic_auth
|
@@ -138,6 +151,7 @@ class TinyGrabber::Agent
|
|
138
151
|
# @param cookies Request cookies
|
139
152
|
#
|
140
153
|
def cookies= cookies
|
154
|
+
cookies = var_to_sym(cookies)
|
141
155
|
cookies = cookies.to_a.map { |x| "#{x[0]}=#{x[1]}" }.join('&') if cookies.is_a?(Hash)
|
142
156
|
fail 'attribute cookies must be String' unless cookies.is_a?(String)
|
143
157
|
@cookies = cookies
|
@@ -154,11 +168,12 @@ class TinyGrabber::Agent
|
|
154
168
|
# @param params Request additional params
|
155
169
|
#
|
156
170
|
def fetch url, method = :get, headers = {}, params = {}
|
157
|
-
if @debug
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
171
|
+
if @debug.active
|
172
|
+
@debug.save '=============================='
|
173
|
+
@debug.save "#{method.upcase} #{url}"
|
174
|
+
@debug.save "-> [proxy] = #{@proxy}" if @proxy
|
175
|
+
@debug.save "-> [params] = #{params}"
|
176
|
+
@debug.save '------------------------------'
|
162
177
|
end
|
163
178
|
set_uri url
|
164
179
|
case method
|
@@ -177,23 +192,23 @@ class TinyGrabber::Agent
|
|
177
192
|
case @response
|
178
193
|
# HTTP response code 1xx
|
179
194
|
when Net::HTTPInformation
|
180
|
-
|
195
|
+
@debug.save "<- [response] = Net::HTTPInformation" if @debug.active
|
181
196
|
# HTTP response code 2xx
|
182
197
|
when Net::HTTPSuccess
|
183
198
|
save_headers if @response.header
|
184
199
|
save_cookies if @response.cookies
|
185
|
-
|
200
|
+
@debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active
|
186
201
|
# HTTP response code 3xx
|
187
202
|
when Net::HTTPRedirection
|
188
|
-
|
203
|
+
@debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active
|
189
204
|
# HTTP response code 4xx
|
190
205
|
when Net::HTTPClientError
|
191
|
-
|
206
|
+
@debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active
|
192
207
|
# HTTP response code 5xx
|
193
208
|
when Net::HTTPServerError
|
194
|
-
|
209
|
+
@debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active
|
195
210
|
end
|
196
|
-
|
211
|
+
@debug.save_to_file @response.body if @debug.save_html
|
197
212
|
@response
|
198
213
|
end
|
199
214
|
|
@@ -205,7 +220,7 @@ class TinyGrabber::Agent
|
|
205
220
|
def set_uri url
|
206
221
|
# It's magic work with escaped url
|
207
222
|
@uri = URI(URI.escape(URI.unescape(url)))
|
208
|
-
|
223
|
+
@debug.save "-> [uri] = #{@uri}" if @debug.active
|
209
224
|
end
|
210
225
|
|
211
226
|
|
@@ -213,7 +228,7 @@ class TinyGrabber::Agent
|
|
213
228
|
#
|
214
229
|
def set_user_agent
|
215
230
|
@headers['User-Agent'] = @user_agent
|
216
|
-
|
231
|
+
@debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
|
217
232
|
end
|
218
233
|
|
219
234
|
|
@@ -221,7 +236,7 @@ class TinyGrabber::Agent
|
|
221
236
|
#
|
222
237
|
def set_basic_auth
|
223
238
|
@request.basic_auth @basic_auth[:username], @basic_auth[:password]
|
224
|
-
|
239
|
+
@debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
|
225
240
|
end
|
226
241
|
|
227
242
|
|
@@ -229,7 +244,7 @@ class TinyGrabber::Agent
|
|
229
244
|
#
|
230
245
|
def set_headers
|
231
246
|
@headers.each { |k, v| @request.add_field(String(k), v) }
|
232
|
-
|
247
|
+
@debug.save "-> [headers] = #{@headers}" if @debug.active
|
233
248
|
end
|
234
249
|
|
235
250
|
|
@@ -237,7 +252,7 @@ class TinyGrabber::Agent
|
|
237
252
|
#
|
238
253
|
def set_cookies
|
239
254
|
@request['Cookie'] = @cookies
|
240
|
-
|
255
|
+
@debug.save "-> [cookies] = #{@cookies}" if @debug.active
|
241
256
|
end
|
242
257
|
|
243
258
|
|
@@ -247,7 +262,7 @@ class TinyGrabber::Agent
|
|
247
262
|
def send_request
|
248
263
|
@http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
|
249
264
|
http.read_timeout = @read_timeout
|
250
|
-
|
265
|
+
@debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
|
251
266
|
http.request(@request)
|
252
267
|
end
|
253
268
|
end
|
@@ -259,7 +274,7 @@ class TinyGrabber::Agent
|
|
259
274
|
@headers = @response.headers
|
260
275
|
# Delete header TRANSFER_ENCODING for chain of requests
|
261
276
|
@headers.delete('transfer-encoding')
|
262
|
-
|
277
|
+
@debug.save "<- [headers] = #{@headers}" if @debug.active
|
263
278
|
end
|
264
279
|
|
265
280
|
|
@@ -267,7 +282,7 @@ class TinyGrabber::Agent
|
|
267
282
|
#
|
268
283
|
def save_cookies
|
269
284
|
@cookies = @response.cookies
|
270
|
-
|
285
|
+
@debug.save "<- [cookies] = #{@cookies}" if @debug.active
|
271
286
|
end
|
272
287
|
|
273
288
|
|
@@ -277,4 +292,27 @@ class TinyGrabber::Agent
|
|
277
292
|
@headers = {}
|
278
293
|
@cookies = nil
|
279
294
|
end
|
295
|
+
|
296
|
+
# Convert variables and contains to symbol
|
297
|
+
#
|
298
|
+
# @param var Variable need to convert
|
299
|
+
#
|
300
|
+
def var_to_sym var, str_to_sym = false
|
301
|
+
if var.is_a?(Hash)
|
302
|
+
result = {}
|
303
|
+
var.each do |k, v|
|
304
|
+
result[k.to_sym] = var_to_sym(v, str_to_sym)
|
305
|
+
end
|
306
|
+
elsif var.is_a?(Array)
|
307
|
+
result = []
|
308
|
+
var.each do |v|
|
309
|
+
result << var_to_sym(v, str_to_sym)
|
310
|
+
end
|
311
|
+
elsif var.is_a?(String)
|
312
|
+
result = str_to_sym ? var.to_sym : var
|
313
|
+
else
|
314
|
+
result = var
|
315
|
+
end
|
316
|
+
result
|
317
|
+
end
|
280
318
|
end
|
data/lib/tiny_grabber/debug.rb
CHANGED
@@ -1,14 +1,74 @@
|
|
1
1
|
# Save debug log information
|
2
2
|
class Debug
|
3
|
+
# Flag to active debug save log
|
4
|
+
attr_accessor :active
|
5
|
+
# Print log or save to file
|
6
|
+
attr_accessor :destination
|
7
|
+
# Flag to save response HTML to file
|
8
|
+
attr_accessor :save_html
|
3
9
|
|
4
|
-
#
|
10
|
+
# Initialize a debug object
|
11
|
+
#
|
12
|
+
def initialize
|
13
|
+
@active = false
|
14
|
+
@destination = :print
|
15
|
+
@save_html = false
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# Set debug active flag
|
20
|
+
#
|
21
|
+
# @param active Flag
|
22
|
+
#
|
23
|
+
def active= active
|
24
|
+
@active = active
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Get debug active flag
|
29
|
+
def active
|
30
|
+
@active
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
# Set debug destination
|
5
35
|
#
|
6
36
|
# @param destination Save log to file or print
|
37
|
+
#
|
38
|
+
def destination= destination
|
39
|
+
@destination = destination
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# Get debug destination
|
44
|
+
#
|
45
|
+
def destination
|
46
|
+
@destination
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
# Set debug flag to save response HTML to file
|
51
|
+
#
|
52
|
+
# @param save_html Flag
|
53
|
+
def save_html= save_html
|
54
|
+
@save_html = save_html
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
# Get debug flag to save response HTML to file
|
59
|
+
#
|
60
|
+
def save_html
|
61
|
+
@save_html
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
# Save log information
|
66
|
+
#
|
7
67
|
# @param message Message body
|
8
68
|
#
|
9
|
-
def
|
69
|
+
def save message
|
10
70
|
message = "TG | #{Time.now.strftime('%Y%m%d-%H%M%S')} | #{message}"
|
11
|
-
case destination
|
71
|
+
case @destination
|
12
72
|
when :file
|
13
73
|
save_to_file message
|
14
74
|
when :print
|
@@ -21,7 +81,7 @@ class Debug
|
|
21
81
|
#
|
22
82
|
# @param message Message body
|
23
83
|
#
|
24
|
-
def
|
84
|
+
def save_to_file message
|
25
85
|
debug_path = "#{Dir.pwd}/log"
|
26
86
|
Dir.mkdir(debug_path, 0775) unless File.exists? debug_path
|
27
87
|
filename = "#{Time.now.strftime('%Y%m%d')}.log"
|
data/lib/tiny_grabber/version.rb
CHANGED
data/lib/tiny_grabber.rb
CHANGED
@@ -80,15 +80,7 @@ class TinyGrabber
|
|
80
80
|
# @param debug Flag to start debug
|
81
81
|
#
|
82
82
|
def debug= debug
|
83
|
-
|
84
|
-
@agent.debug = debug
|
85
|
-
@agent.debug_destination = :print
|
86
|
-
@agent.debug_save_html = false
|
87
|
-
elsif debug.is_a? Hash
|
88
|
-
@agent.debug = debug[:active]
|
89
|
-
@agent.debug_destination = debug[:destination]
|
90
|
-
@agent.debug_save_html = debug[:save_html]
|
91
|
-
end
|
83
|
+
@agent.debug = debug
|
92
84
|
end
|
93
85
|
|
94
86
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny_grabber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksandr Chernyshev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: socksify
|