tiny_grabber 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99211b6e18440ea58661cf5b5a5bdadec631f942
4
- data.tar.gz: 1e78bd2089d65949431b519ae7a15b3f4bba202f
3
+ metadata.gz: c48375a5210123b907c6ebe2a7911f1c7090188a
4
+ data.tar.gz: 19b5052e7e79e876a40368d9e061a9aba032b7b4
5
5
  SHA512:
6
- metadata.gz: 3a4dc8a114ea9818dee2bd400ccc5e45c60ae40f2a9e7ce5002c51b214b9e471d1066f2d865e48ab0acbe91e80c51d55cb17ed43a7eed29e01283f82c61c09b8
7
- data.tar.gz: 4ec0f357261f447f3cd34dda16a9376dd770a73a9c7fc30b22577f42de1b4a33bd557b5c8da85ce54a5d40d780af4e152ebbd93b0d50097fbf0c7081d134f30e
6
+ metadata.gz: 498cffe418cabb9917dad706d8df5d6281fcc119418016dcd775169346fd5108fcdbf0af5c8d75cbb1472acfdef96d4bc82e037cfc339de729fd864f1dcdafc2
7
+ data.tar.gz: 582d3bf2fa5cde9a54c2862d5f5d237218d483f69dadd8749eba93a07ab58a17ecfd1d40aaac5197823a155363bd51ae1196f5de033c33239432cd7b76d281b6
data/README.md CHANGED
@@ -28,7 +28,7 @@ Or install it yourself as:
28
28
  require 'tiny_grabber'
29
29
 
30
30
  read_timeout = 300
31
- user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36'
31
+ user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36'
32
32
  proxy = { ip: 'xx.xx.xx.xx', port: 'xxxx' }
33
33
  headers1 = { 'Content-Type' => 'text/html; charset=utf-8' }
34
34
  headers2 = { 'Content-Type' => 'text/html; charset=utf-8', 'Connection' => 'keep-alive' }
@@ -54,13 +54,16 @@ tg.cookies = cookies
54
54
 
55
55
  # Make response with GET method
56
56
  response = tg.get 'https://whoer.net/ru', headers
57
+ # Reset headers and cookies
58
+ tg.reset
57
59
  # Make response with POST method
58
- response = tg.get 'https://whoer.net/ru', params, headers
60
+ response = tg.post 'https://whoer.net/ru', params, headers
61
+
59
62
 
60
63
  # Make singleton response with GET method
61
- response = TinyGrabber.get 'https://whoer.net/ru', headers, { debug = true, read_timeout = read_timeout ...}
64
+ response = TinyGrabber.get 'https://whoer.net/ru', { debug = true, read_timeout = read_timeout ... }
62
65
  # Make singleton response with POST method
63
- response = TinyGrabber.post 'https://whoer.net/ru', params, headers, { debug = true, read_timeout = read_timeout ...}
66
+ response = TinyGrabber.post 'https://whoer.net/ru', params, { debug = true, read_timeout = read_timeout ... }
64
67
 
65
68
  # Get Nokogiri object from response HTML
66
69
  ng = response.ng
@@ -70,11 +73,18 @@ response.code
70
73
  response.cookies
71
74
  # Get response headers
72
75
  response.headers
73
-
76
+ # Get response HTML
77
+ response.body
74
78
  ```
75
79
 
76
80
  ## Changelog
77
81
 
82
+ * *v 0.2.1*
83
+ * Setting random user_agent from list if it not seted
84
+ * Remove headers attribute from singleton methods
85
+ * Remove header transfer-encoding for chain requests
86
+ * Add reset method for delete headers and cookies
87
+
78
88
  * *v 0.2.0*
79
89
  * Now there is an opportunity to create object TinyGrabber
80
90
  * Change order of parameters for singleton request
data/lib/tiny_grabber.rb CHANGED
@@ -47,9 +47,9 @@ class TinyGrabber
47
47
  # @param url Resource link
48
48
  # @param headers Request header
49
49
  #
50
- def self.get url, headers = {}, config = {}
50
+ def self.get url, config = {}
51
51
  initialize config
52
- @agent.fetch url, :get, headers
52
+ @agent.fetch url, :get
53
53
  end
54
54
 
55
55
 
@@ -59,7 +59,7 @@ class TinyGrabber
59
59
  # @param params Request post data
60
60
  # @param headers Request header
61
61
  #
62
- def post url, params = {}, headers = {}, config = {}
62
+ def post url, params = {}, headers = {}
63
63
  @agent.fetch url, :post, headers, params
64
64
  end
65
65
 
@@ -69,9 +69,9 @@ class TinyGrabber
69
69
  # @param url Resource link
70
70
  # @param headers Request header
71
71
  #
72
- def self.post url, params = {}, headers = {}, config = {}
72
+ def self.post url, params = {}, config = {}
73
73
  initialize config
74
- @agent.fetch url, :post, headers, params
74
+ @agent.fetch url, :post, {}, params
75
75
  end
76
76
 
77
77
 
@@ -173,7 +173,9 @@ class TinyGrabber
173
173
  @agent.cookies = cookies
174
174
  end
175
175
 
176
-
177
-
178
-
176
+ # Call RESET agent method
177
+ #
178
+ def reset
179
+ @agent.reset
180
+ end
179
181
  end
@@ -18,13 +18,37 @@ class TinyGrabber::Agent
18
18
  # Headers
19
19
  attr_accessor :cookies
20
20
 
21
+ # Agent aliases given from http://www.useragentstring.com/pages/Chrome/
22
+ AGENT_ALIASES = [
23
+ # Chrome
24
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
25
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
26
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
27
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
28
+ # Firefox
29
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
30
+ 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
31
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
32
+ 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
33
+ # Internet Explorer
34
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
35
+ 'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
36
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
37
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
38
+ # Opera
39
+ 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
40
+ 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
41
+ 'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
42
+ 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
43
+ ]
44
+
21
45
  # Initialization object
22
46
  #
23
47
  def initialize
24
48
  @debug = false
25
49
 
26
50
  # Initialize variables agent attributes
27
- @user_agent = nil
51
+ @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
28
52
  @proxy = []
29
53
  @basic_auth = {}
30
54
  @headers = {}
@@ -126,7 +150,7 @@ class TinyGrabber::Agent
126
150
  if @debug
127
151
  p "#{debug_initial_word} =============================="
128
152
  p "#{debug_initial_word} #{method.upcase} #{url}"
129
- p "#{debug_initial_word} #{params}"
153
+ p "#{debug_initial_word} -> [params] = #{params}"
130
154
  p "#{debug_initial_word} ------------------------------"
131
155
  end
132
156
  set_uri url
@@ -160,7 +184,7 @@ class TinyGrabber::Agent
160
184
  if @debug
161
185
  debug_filename = "log/#{method.upcase}_#{@uri.to_s.gsub(/[\/:]/, '_').gsub(/_+/, '_')}"
162
186
  File.open(debug_filename, 'wb') { |f| f << @response.body } if @debug
163
- p "#{debug_initial_word} HTML > #{debug_filename}"
187
+ p "#{debug_initial_word} <- [html_file] = #{debug_filename}"
164
188
  end
165
189
  @response
166
190
  end
@@ -173,7 +197,7 @@ class TinyGrabber::Agent
173
197
  def set_uri url
174
198
  # It's magic work with escaped url
175
199
  @uri = URI(URI.escape(URI.unescape(url)))
176
- p "#{debug_initial_word} URI = #{@uri}" if @debug
200
+ p "#{debug_initial_word} -> [uri] = #{@uri}" if @debug
177
201
  end
178
202
 
179
203
 
@@ -181,7 +205,7 @@ class TinyGrabber::Agent
181
205
  #
182
206
  def set_user_agent
183
207
  @headers['User-Agent'] = @user_agent
184
- p "#{debug_initial_word} user_agent = #{@user_agent}" if @debug
208
+ p "#{debug_initial_word} -> [user_agent] = #{@user_agent}" if @debug
185
209
  end
186
210
 
187
211
 
@@ -189,7 +213,7 @@ class TinyGrabber::Agent
189
213
  #
190
214
  def set_basic_auth
191
215
  @request.basic_auth @basic_auth[:username], @basic_auth[:password]
192
- p "#{debug_initial_word} basic_auth = #{@basic_auth}" if @debug
216
+ p "#{debug_initial_word} -> [basic_auth] = #{@basic_auth}" if @debug
193
217
  end
194
218
 
195
219
 
@@ -197,7 +221,7 @@ class TinyGrabber::Agent
197
221
  #
198
222
  def set_headers
199
223
  @headers.each { |k, v| @request.add_field(String(k), v) }
200
- p "#{debug_initial_word} headers = #{@headers}" if @debug
224
+ p "#{debug_initial_word} -> [headers] = #{@headers}" if @debug
201
225
  end
202
226
 
203
227
 
@@ -205,7 +229,7 @@ class TinyGrabber::Agent
205
229
  #
206
230
  def set_cookies
207
231
  @request['Cookie'] = @cookies
208
- p "#{debug_initial_word} cookies = #{@cookies}" if @debug
232
+ p "#{debug_initial_word} -> [cookies] = #{@cookies}" if @debug
209
233
  end
210
234
 
211
235
 
@@ -215,7 +239,7 @@ class TinyGrabber::Agent
215
239
  def send_request
216
240
  @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
217
241
  http.read_timeout = @read_timeout
218
- p "#{debug_initial_word} read_timeout = #{@read_timeout}" if @debug
242
+ p "#{debug_initial_word} -> [read_timeout] = #{@read_timeout}" if @debug
219
243
  http.request(@request)
220
244
  end
221
245
  end
@@ -225,7 +249,9 @@ class TinyGrabber::Agent
225
249
  #
226
250
  def save_headers
227
251
  @headers = @response.headers
228
- p "#{debug_initial_word} save_headers = #{@headers}" if @debug
252
+ # Delete header TRANSFER_ENCODING for chain of requests
253
+ @headers.delete('transfer-encoding')
254
+ p "#{debug_initial_word} <- [headers] = #{@headers}" if @debug
229
255
  end
230
256
 
231
257
 
@@ -233,7 +259,15 @@ class TinyGrabber::Agent
233
259
  #
234
260
  def save_cookies
235
261
  @cookies = @response.cookies
236
- p "#{debug_initial_word} save_cookies = #{@cookies}" if @debug
262
+ p "#{debug_initial_word} <- [cookies] = #{@cookies}" if @debug
263
+ end
264
+
265
+
266
+ # Clears headers and cookies
267
+ #
268
+ def reset
269
+ @headers = {}
270
+ @cookies = nil
237
271
  end
238
272
 
239
273
 
@@ -1,4 +1,4 @@
1
1
  class TinyGrabber
2
2
  # Version number
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.1"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiny_grabber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandr Chernyshev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-02 00:00:00.000000000 Z
11
+ date: 2016-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: socksify