tiny_grabber 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99211b6e18440ea58661cf5b5a5bdadec631f942
4
- data.tar.gz: 1e78bd2089d65949431b519ae7a15b3f4bba202f
3
+ metadata.gz: c48375a5210123b907c6ebe2a7911f1c7090188a
4
+ data.tar.gz: 19b5052e7e79e876a40368d9e061a9aba032b7b4
5
5
  SHA512:
6
- metadata.gz: 3a4dc8a114ea9818dee2bd400ccc5e45c60ae40f2a9e7ce5002c51b214b9e471d1066f2d865e48ab0acbe91e80c51d55cb17ed43a7eed29e01283f82c61c09b8
7
- data.tar.gz: 4ec0f357261f447f3cd34dda16a9376dd770a73a9c7fc30b22577f42de1b4a33bd557b5c8da85ce54a5d40d780af4e152ebbd93b0d50097fbf0c7081d134f30e
6
+ metadata.gz: 498cffe418cabb9917dad706d8df5d6281fcc119418016dcd775169346fd5108fcdbf0af5c8d75cbb1472acfdef96d4bc82e037cfc339de729fd864f1dcdafc2
7
+ data.tar.gz: 582d3bf2fa5cde9a54c2862d5f5d237218d483f69dadd8749eba93a07ab58a17ecfd1d40aaac5197823a155363bd51ae1196f5de033c33239432cd7b76d281b6
data/README.md CHANGED
@@ -28,7 +28,7 @@ Or install it yourself as:
28
28
  require 'tiny_grabber'
29
29
 
30
30
  read_timeout = 300
31
- user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36'
31
+ user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36'
32
32
  proxy = { ip: 'xx.xx.xx.xx', port: 'xxxx' }
33
33
  headers1 = { 'Content-Type' => 'text/html; charset=utf-8' }
34
34
  headers2 = { 'Content-Type' => 'text/html; charset=utf-8', 'Connection' => 'keep-alive' }
@@ -54,13 +54,16 @@ tg.cookies = cookies
54
54
 
55
55
  # Make response with GET method
56
56
  response = tg.get 'https://whoer.net/ru', headers
57
+ # Reset headers and cookies
58
+ tg.reset
57
59
  # Make response with POST method
58
- response = tg.get 'https://whoer.net/ru', params, headers
60
+ response = tg.post 'https://whoer.net/ru', params, headers
61
+
59
62
 
60
63
  # Make singleton response with GET method
61
- response = TinyGrabber.get 'https://whoer.net/ru', headers, { debug = true, read_timeout = read_timeout ...}
64
+ response = TinyGrabber.get 'https://whoer.net/ru', { debug = true, read_timeout = read_timeout ... }
62
65
  # Make singleton response with POST method
63
- response = TinyGrabber.post 'https://whoer.net/ru', params, headers, { debug = true, read_timeout = read_timeout ...}
66
+ response = TinyGrabber.post 'https://whoer.net/ru', params, { debug = true, read_timeout = read_timeout ... }
64
67
 
65
68
  # Get Nokogiri object from response HTML
66
69
  ng = response.ng
@@ -70,11 +73,18 @@ response.code
70
73
  response.cookies
71
74
  # Get response headers
72
75
  response.headers
73
-
76
+ # Get response HTML
77
+ response.body
74
78
  ```
75
79
 
76
80
  ## Changelog
77
81
 
82
+ * *v 0.2.1*
83
+ * Setting random user_agent from list if it not seted
84
+ * Remove headers attribute from singleton methods
85
+ * Remove header transfer-encoding for chain requests
86
+ * Add reset method for delete headers and cookies
87
+
78
88
  * *v 0.2.0*
79
89
  * Now there is an opportunity to create object TinyGrabber
80
90
  * Change order of parameters for singleton request
data/lib/tiny_grabber.rb CHANGED
@@ -47,9 +47,9 @@ class TinyGrabber
47
47
  # @param url Resource link
48
48
  # @param headers Request header
49
49
  #
50
- def self.get url, headers = {}, config = {}
50
+ def self.get url, config = {}
51
51
  initialize config
52
- @agent.fetch url, :get, headers
52
+ @agent.fetch url, :get
53
53
  end
54
54
 
55
55
 
@@ -59,7 +59,7 @@ class TinyGrabber
59
59
  # @param params Request post data
60
60
  # @param headers Request header
61
61
  #
62
- def post url, params = {}, headers = {}, config = {}
62
+ def post url, params = {}, headers = {}
63
63
  @agent.fetch url, :post, headers, params
64
64
  end
65
65
 
@@ -69,9 +69,9 @@ class TinyGrabber
69
69
  # @param url Resource link
70
70
  # @param headers Request header
71
71
  #
72
- def self.post url, params = {}, headers = {}, config = {}
72
+ def self.post url, params = {}, config = {}
73
73
  initialize config
74
- @agent.fetch url, :post, headers, params
74
+ @agent.fetch url, :post, {}, params
75
75
  end
76
76
 
77
77
 
@@ -173,7 +173,9 @@ class TinyGrabber
173
173
  @agent.cookies = cookies
174
174
  end
175
175
 
176
-
177
-
178
-
176
+ # Call RESET agent method
177
+ #
178
+ def reset
179
+ @agent.reset
180
+ end
179
181
  end
@@ -18,13 +18,37 @@ class TinyGrabber::Agent
18
18
  # Headers
19
19
  attr_accessor :cookies
20
20
 
21
+ # Agent aliases given from http://www.useragentstring.com/pages/Chrome/
22
+ AGENT_ALIASES = [
23
+ # Chrome
24
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
25
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
26
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
27
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
28
+ # Firefox
29
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
30
+ 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
31
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
32
+ 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
33
+ # Internet Explorer
34
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
35
+ 'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
36
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
37
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
38
+ # Opera
39
+ 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
40
+ 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
41
+ 'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
42
+ 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
43
+ ]
44
+
21
45
  # Initialization object
22
46
  #
23
47
  def initialize
24
48
  @debug = false
25
49
 
26
50
  # Initialize variables agent attributes
27
- @user_agent = nil
51
+ @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
28
52
  @proxy = []
29
53
  @basic_auth = {}
30
54
  @headers = {}
@@ -126,7 +150,7 @@ class TinyGrabber::Agent
126
150
  if @debug
127
151
  p "#{debug_initial_word} =============================="
128
152
  p "#{debug_initial_word} #{method.upcase} #{url}"
129
- p "#{debug_initial_word} #{params}"
153
+ p "#{debug_initial_word} -> [params] = #{params}"
130
154
  p "#{debug_initial_word} ------------------------------"
131
155
  end
132
156
  set_uri url
@@ -160,7 +184,7 @@ class TinyGrabber::Agent
160
184
  if @debug
161
185
  debug_filename = "log/#{method.upcase}_#{@uri.to_s.gsub(/[\/:]/, '_').gsub(/_+/, '_')}"
162
186
  File.open(debug_filename, 'wb') { |f| f << @response.body } if @debug
163
- p "#{debug_initial_word} HTML > #{debug_filename}"
187
+ p "#{debug_initial_word} <- [html_file] = #{debug_filename}"
164
188
  end
165
189
  @response
166
190
  end
@@ -173,7 +197,7 @@ class TinyGrabber::Agent
173
197
  def set_uri url
174
198
  # It's magic work with escaped url
175
199
  @uri = URI(URI.escape(URI.unescape(url)))
176
- p "#{debug_initial_word} URI = #{@uri}" if @debug
200
+ p "#{debug_initial_word} -> [uri] = #{@uri}" if @debug
177
201
  end
178
202
 
179
203
 
@@ -181,7 +205,7 @@ class TinyGrabber::Agent
181
205
  #
182
206
  def set_user_agent
183
207
  @headers['User-Agent'] = @user_agent
184
- p "#{debug_initial_word} user_agent = #{@user_agent}" if @debug
208
+ p "#{debug_initial_word} -> [user_agent] = #{@user_agent}" if @debug
185
209
  end
186
210
 
187
211
 
@@ -189,7 +213,7 @@ class TinyGrabber::Agent
189
213
  #
190
214
  def set_basic_auth
191
215
  @request.basic_auth @basic_auth[:username], @basic_auth[:password]
192
- p "#{debug_initial_word} basic_auth = #{@basic_auth}" if @debug
216
+ p "#{debug_initial_word} -> [basic_auth] = #{@basic_auth}" if @debug
193
217
  end
194
218
 
195
219
 
@@ -197,7 +221,7 @@ class TinyGrabber::Agent
197
221
  #
198
222
  def set_headers
199
223
  @headers.each { |k, v| @request.add_field(String(k), v) }
200
- p "#{debug_initial_word} headers = #{@headers}" if @debug
224
+ p "#{debug_initial_word} -> [headers] = #{@headers}" if @debug
201
225
  end
202
226
 
203
227
 
@@ -205,7 +229,7 @@ class TinyGrabber::Agent
205
229
  #
206
230
  def set_cookies
207
231
  @request['Cookie'] = @cookies
208
- p "#{debug_initial_word} cookies = #{@cookies}" if @debug
232
+ p "#{debug_initial_word} -> [cookies] = #{@cookies}" if @debug
209
233
  end
210
234
 
211
235
 
@@ -215,7 +239,7 @@ class TinyGrabber::Agent
215
239
  def send_request
216
240
  @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
217
241
  http.read_timeout = @read_timeout
218
- p "#{debug_initial_word} read_timeout = #{@read_timeout}" if @debug
242
+ p "#{debug_initial_word} -> [read_timeout] = #{@read_timeout}" if @debug
219
243
  http.request(@request)
220
244
  end
221
245
  end
@@ -225,7 +249,9 @@ class TinyGrabber::Agent
225
249
  #
226
250
  def save_headers
227
251
  @headers = @response.headers
228
- p "#{debug_initial_word} save_headers = #{@headers}" if @debug
252
+ # Delete header TRANSFER_ENCODING for chain of requests
253
+ @headers.delete('transfer-encoding')
254
+ p "#{debug_initial_word} <- [headers] = #{@headers}" if @debug
229
255
  end
230
256
 
231
257
 
@@ -233,7 +259,15 @@ class TinyGrabber::Agent
233
259
  #
234
260
  def save_cookies
235
261
  @cookies = @response.cookies
236
- p "#{debug_initial_word} save_cookies = #{@cookies}" if @debug
262
+ p "#{debug_initial_word} <- [cookies] = #{@cookies}" if @debug
263
+ end
264
+
265
+
266
+ # Clears headers and cookies
267
+ #
268
+ def reset
269
+ @headers = {}
270
+ @cookies = nil
237
271
  end
238
272
 
239
273
 
@@ -1,4 +1,4 @@
1
1
  class TinyGrabber
2
2
  # Version number
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.1"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiny_grabber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandr Chernyshev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-02 00:00:00.000000000 Z
11
+ date: 2016-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: socksify