tiny_grabber 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -5
- data/lib/tiny_grabber.rb +10 -8
- data/lib/tiny_grabber/agent.rb +45 -11
- data/lib/tiny_grabber/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c48375a5210123b907c6ebe2a7911f1c7090188a
|
4
|
+
data.tar.gz: 19b5052e7e79e876a40368d9e061a9aba032b7b4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 498cffe418cabb9917dad706d8df5d6281fcc119418016dcd775169346fd5108fcdbf0af5c8d75cbb1472acfdef96d4bc82e037cfc339de729fd864f1dcdafc2
|
7
|
+
data.tar.gz: 582d3bf2fa5cde9a54c2862d5f5d237218d483f69dadd8749eba93a07ab58a17ecfd1d40aaac5197823a155363bd51ae1196f5de033c33239432cd7b76d281b6
|
data/README.md
CHANGED
@@ -28,7 +28,7 @@ Or install it yourself as:
|
|
28
28
|
require 'tiny_grabber'
|
29
29
|
|
30
30
|
read_timeout = 300
|
31
|
-
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
31
|
+
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36'
|
32
32
|
proxy = { ip: 'xx.xx.xx.xx', port: 'xxxx' }
|
33
33
|
headers1 = { 'Content-Type' => 'text/html; charset=utf-8' }
|
34
34
|
headers2 = { 'Content-Type' => 'text/html; charset=utf-8', 'Connection' => 'keep-alive' }
|
@@ -54,13 +54,16 @@ tg.cookies = cookies
|
|
54
54
|
|
55
55
|
# Make response with GET method
|
56
56
|
response = tg.get 'https://whoer.net/ru', headers
|
57
|
+
# Reset headers and cookies
|
58
|
+
tg.reset
|
57
59
|
# Make response with POST method
|
58
|
-
response = tg.
|
60
|
+
response = tg.post 'https://whoer.net/ru', params, headers
|
61
|
+
|
59
62
|
|
60
63
|
# Make singleton response with GET method
|
61
|
-
response = TinyGrabber.get 'https://whoer.net/ru',
|
64
|
+
response = TinyGrabber.get 'https://whoer.net/ru', { debug = true, read_timeout = read_timeout ... }
|
62
65
|
# Make singleton response with POST method
|
63
|
-
response = TinyGrabber.post 'https://whoer.net/ru', params,
|
66
|
+
response = TinyGrabber.post 'https://whoer.net/ru', params, { debug = true, read_timeout = read_timeout ... }
|
64
67
|
|
65
68
|
# Get Nokogiri object from response HTML
|
66
69
|
ng = response.ng
|
@@ -70,11 +73,18 @@ response.code
|
|
70
73
|
response.cookies
|
71
74
|
# Get response headers
|
72
75
|
response.headers
|
73
|
-
|
76
|
+
# Get response HTML
|
77
|
+
response.body
|
74
78
|
```
|
75
79
|
|
76
80
|
## Changelog
|
77
81
|
|
82
|
+
* *v 0.2.1*
|
83
|
+
* Setting random user_agent from list if it not seted
|
84
|
+
* Remove headers attribute from singleton methods
|
85
|
+
* Remove header transfer-encoding for chain requests
|
86
|
+
* Add reset method for delete headers and cookies
|
87
|
+
|
78
88
|
* *v 0.2.0*
|
79
89
|
* Now there is an opportunity to create object TinyGrabber
|
80
90
|
* Change order of parameters for singleton request
|
data/lib/tiny_grabber.rb
CHANGED
@@ -47,9 +47,9 @@ class TinyGrabber
|
|
47
47
|
# @param url Resource link
|
48
48
|
# @param headers Request header
|
49
49
|
#
|
50
|
-
def self.get url,
|
50
|
+
def self.get url, config = {}
|
51
51
|
initialize config
|
52
|
-
@agent.fetch url, :get
|
52
|
+
@agent.fetch url, :get
|
53
53
|
end
|
54
54
|
|
55
55
|
|
@@ -59,7 +59,7 @@ class TinyGrabber
|
|
59
59
|
# @param params Request post data
|
60
60
|
# @param headers Request header
|
61
61
|
#
|
62
|
-
def post url, params = {}, headers = {}
|
62
|
+
def post url, params = {}, headers = {}
|
63
63
|
@agent.fetch url, :post, headers, params
|
64
64
|
end
|
65
65
|
|
@@ -69,9 +69,9 @@ class TinyGrabber
|
|
69
69
|
# @param url Resource link
|
70
70
|
# @param headers Request header
|
71
71
|
#
|
72
|
-
def self.post url, params = {},
|
72
|
+
def self.post url, params = {}, config = {}
|
73
73
|
initialize config
|
74
|
-
@agent.fetch url, :post,
|
74
|
+
@agent.fetch url, :post, {}, params
|
75
75
|
end
|
76
76
|
|
77
77
|
|
@@ -173,7 +173,9 @@ class TinyGrabber
|
|
173
173
|
@agent.cookies = cookies
|
174
174
|
end
|
175
175
|
|
176
|
-
|
177
|
-
|
178
|
-
|
176
|
+
# Call RESET agent method
|
177
|
+
#
|
178
|
+
def reset
|
179
|
+
@agent.reset
|
180
|
+
end
|
179
181
|
end
|
data/lib/tiny_grabber/agent.rb
CHANGED
@@ -18,13 +18,37 @@ class TinyGrabber::Agent
|
|
18
18
|
# Headers
|
19
19
|
attr_accessor :cookies
|
20
20
|
|
21
|
+
# Agent aliases given from http://www.useragentstring.com/pages/Chrome/
|
22
|
+
AGENT_ALIASES = [
|
23
|
+
# Chrome
|
24
|
+
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
|
25
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
|
26
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
|
27
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
|
28
|
+
# Firefox
|
29
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
|
30
|
+
'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
|
31
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
|
32
|
+
'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
|
33
|
+
# Internet Explorer
|
34
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
|
35
|
+
'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
|
36
|
+
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
|
37
|
+
'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
|
38
|
+
# Opera
|
39
|
+
'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
|
40
|
+
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
|
41
|
+
'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
|
42
|
+
'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
|
43
|
+
]
|
44
|
+
|
21
45
|
# Initialization object
|
22
46
|
#
|
23
47
|
def initialize
|
24
48
|
@debug = false
|
25
49
|
|
26
50
|
# Initialize variables agent attributes
|
27
|
-
@user_agent =
|
51
|
+
@user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
|
28
52
|
@proxy = []
|
29
53
|
@basic_auth = {}
|
30
54
|
@headers = {}
|
@@ -126,7 +150,7 @@ class TinyGrabber::Agent
|
|
126
150
|
if @debug
|
127
151
|
p "#{debug_initial_word} =============================="
|
128
152
|
p "#{debug_initial_word} #{method.upcase} #{url}"
|
129
|
-
p "#{debug_initial_word} #{params}"
|
153
|
+
p "#{debug_initial_word} -> [params] = #{params}"
|
130
154
|
p "#{debug_initial_word} ------------------------------"
|
131
155
|
end
|
132
156
|
set_uri url
|
@@ -160,7 +184,7 @@ class TinyGrabber::Agent
|
|
160
184
|
if @debug
|
161
185
|
debug_filename = "log/#{method.upcase}_#{@uri.to_s.gsub(/[\/:]/, '_').gsub(/_+/, '_')}"
|
162
186
|
File.open(debug_filename, 'wb') { |f| f << @response.body } if @debug
|
163
|
-
p "#{debug_initial_word}
|
187
|
+
p "#{debug_initial_word} <- [html_file] = #{debug_filename}"
|
164
188
|
end
|
165
189
|
@response
|
166
190
|
end
|
@@ -173,7 +197,7 @@ class TinyGrabber::Agent
|
|
173
197
|
def set_uri url
|
174
198
|
# It's magic work with escaped url
|
175
199
|
@uri = URI(URI.escape(URI.unescape(url)))
|
176
|
-
p "#{debug_initial_word}
|
200
|
+
p "#{debug_initial_word} -> [uri] = #{@uri}" if @debug
|
177
201
|
end
|
178
202
|
|
179
203
|
|
@@ -181,7 +205,7 @@ class TinyGrabber::Agent
|
|
181
205
|
#
|
182
206
|
def set_user_agent
|
183
207
|
@headers['User-Agent'] = @user_agent
|
184
|
-
p "#{debug_initial_word} user_agent = #{@user_agent}" if @debug
|
208
|
+
p "#{debug_initial_word} -> [user_agent] = #{@user_agent}" if @debug
|
185
209
|
end
|
186
210
|
|
187
211
|
|
@@ -189,7 +213,7 @@ class TinyGrabber::Agent
|
|
189
213
|
#
|
190
214
|
def set_basic_auth
|
191
215
|
@request.basic_auth @basic_auth[:username], @basic_auth[:password]
|
192
|
-
p "#{debug_initial_word} basic_auth = #{@basic_auth}" if @debug
|
216
|
+
p "#{debug_initial_word} -> [basic_auth] = #{@basic_auth}" if @debug
|
193
217
|
end
|
194
218
|
|
195
219
|
|
@@ -197,7 +221,7 @@ class TinyGrabber::Agent
|
|
197
221
|
#
|
198
222
|
def set_headers
|
199
223
|
@headers.each { |k, v| @request.add_field(String(k), v) }
|
200
|
-
p "#{debug_initial_word} headers = #{@headers}" if @debug
|
224
|
+
p "#{debug_initial_word} -> [headers] = #{@headers}" if @debug
|
201
225
|
end
|
202
226
|
|
203
227
|
|
@@ -205,7 +229,7 @@ class TinyGrabber::Agent
|
|
205
229
|
#
|
206
230
|
def set_cookies
|
207
231
|
@request['Cookie'] = @cookies
|
208
|
-
p "#{debug_initial_word} cookies = #{@cookies}" if @debug
|
232
|
+
p "#{debug_initial_word} -> [cookies] = #{@cookies}" if @debug
|
209
233
|
end
|
210
234
|
|
211
235
|
|
@@ -215,7 +239,7 @@ class TinyGrabber::Agent
|
|
215
239
|
def send_request
|
216
240
|
@http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
|
217
241
|
http.read_timeout = @read_timeout
|
218
|
-
p "#{debug_initial_word} read_timeout = #{@read_timeout}" if @debug
|
242
|
+
p "#{debug_initial_word} -> [read_timeout] = #{@read_timeout}" if @debug
|
219
243
|
http.request(@request)
|
220
244
|
end
|
221
245
|
end
|
@@ -225,7 +249,9 @@ class TinyGrabber::Agent
|
|
225
249
|
#
|
226
250
|
def save_headers
|
227
251
|
@headers = @response.headers
|
228
|
-
|
252
|
+
# Delete header TRANSFER_ENCODING for chain of requests
|
253
|
+
@headers.delete('transfer-encoding')
|
254
|
+
p "#{debug_initial_word} <- [headers] = #{@headers}" if @debug
|
229
255
|
end
|
230
256
|
|
231
257
|
|
@@ -233,7 +259,15 @@ class TinyGrabber::Agent
|
|
233
259
|
#
|
234
260
|
def save_cookies
|
235
261
|
@cookies = @response.cookies
|
236
|
-
p "#{debug_initial_word}
|
262
|
+
p "#{debug_initial_word} <- [cookies] = #{@cookies}" if @debug
|
263
|
+
end
|
264
|
+
|
265
|
+
|
266
|
+
# Clears headers and cookies
|
267
|
+
#
|
268
|
+
def reset
|
269
|
+
@headers = {}
|
270
|
+
@cookies = nil
|
237
271
|
end
|
238
272
|
|
239
273
|
|
data/lib/tiny_grabber/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny_grabber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksandr Chernyshev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: socksify
|