tiny_grabber 0.2.8 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51f1a586c038410a555be4a5d046134f92786f75
4
- data.tar.gz: 0bdb153926a9553276d06f7c40666d366fcbf0a6
3
+ metadata.gz: 96bdccdeb24ccdbbb99e4cb2e73bf1450aca4e4f
4
+ data.tar.gz: 785bed54953a6faa41325aa8d8dda56f688b638d
5
5
  SHA512:
6
- metadata.gz: d9e986b006f6734e043ddedd168bbc66925ed97380b18e1d1651f0dfedcd9c36caccccd13eb3fb363d5b292ada62c2c41209cd862649552cfaf175c7bb732b6e
7
- data.tar.gz: a849cf33106b8a119da43e4bd05dbc3e1cd592ab78947e7d8868d2cc2c2b7e4043dcd03678c56ed12ae91e98b4a26e746f4cd51803ab6cd3c3d8c926433ac0c7
6
+ metadata.gz: 4256e04d8e42b404c09e32ad66bd99c4c1f38680be021fcb6e36e6e8a1e86e65be548945d853583fad0177d53e2732a275ec804079531757d477af91bd403674
7
+ data.tar.gz: 46f84bf20a8c0e4de69e3bea855c5d3a26bd66332c75fbb7e2e6ed1805364c05262e10d897f4ea458931ac842819baee55f0a3c4cd7ed731ce7207c2bdd8d149
data/Gemfile CHANGED
@@ -2,3 +2,7 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in tiny_grabber.gemspec
4
4
  gemspec
5
+
6
+ gem 'rubocop', require: false
7
+ gem 'byebug'
8
+ gem 'timezone', '~> 1.0'
data/README.md CHANGED
@@ -126,6 +126,11 @@ response.body
126
126
 
127
127
  ## Changelog
128
128
 
129
+ * *v 0.2.9*
130
+ * Added agent attribute for redirect follow location
131
+ * Used 302 http answer code and header location for redirecting
132
+ * Used meta refresh url
133
+ * Refactored code for rubocop
129
134
  * *v 0.2.8*
130
135
  * Added processing Accept headers
131
136
  * *v 0.2.7*
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
@@ -2,334 +2,340 @@
2
2
  # Initialize connect with Resource
3
3
  # Setting connect attributes
4
4
  #
5
- class TinyGrabber::Agent
6
- # Debug configuration
7
- attr_accessor :debug
8
- # Max time to execute request
9
- attr_accessor :read_timeout
10
- # Web browser name
11
- attr_accessor :user_agent
12
- # Remote proxy configuration
13
- attr_accessor :proxy
14
- # Basic authentification configuration
15
- attr_accessor :basic_auth
16
- # Headers
17
- attr_accessor :headers
18
- # Headers
19
- attr_accessor :cookies
20
-
21
- # Agent aliases given from http://www.useragentstring.com/pages/Chrome/
22
- AGENT_ALIASES = [
23
- # Chrome
24
- 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
25
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
26
- 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
27
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
28
- # Firefox
29
- 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
30
- 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
31
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
32
- 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
33
- # Internet Explorer
34
- 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
35
- 'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
36
- 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
37
- 'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
38
- # Opera
39
- 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
40
- 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
41
- 'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
42
- 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
43
- ]
44
-
45
- # Initialization object
46
- #
47
- def initialize
48
- @debug = Debug.new
49
-
50
- # Initialize variables agent attributes
51
- @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
52
- @proxy = []
53
- @basic_auth = {}
54
- @headers = {}
55
- @cookies = nil
56
- @read_timeout = 10
57
- # Initialize variable for URI object
58
- @uri = nil
59
- # Initialize variable for Net::HTTP request object
60
- @http = Net::HTTP
61
- # Initialize variable for Net::HTTP response object
62
- @response = nil
63
- @verify_mode = OpenSSL::SSL::VERIFY_NONE
64
- end
65
-
66
-
67
- # Set debug configuration
68
- #
69
- # @param debug
70
- #
71
- def debug= debug
72
- debug = var_to_sym(debug, true)
73
- if debug.is_a?(Hash)
74
- @debug.active = debug[:active]
75
- @debug.destination = debug[:destination]
76
- @debug.save_html = debug[:save_html]
77
- elsif debug.is_a?(TrueClass)
78
- @debug.active = true
5
+ class TinyGrabber
6
+ class Agent
7
+ # Debug configuration
8
+ attr_writer :debug
9
+ # Max time to execute request
10
+ attr_writer :read_timeout
11
+ # Web browser name
12
+ attr_writer :user_agent
13
+ # Remote proxy configuration
14
+ attr_accessor :proxy
15
+ # Basic authentification configuration
16
+ attr_writer :basic_auth
17
+ # Headers
18
+ attr_writer :headers
19
+ # Headers
20
+ attr_writer :cookies
21
+ # Set verify mode
22
+ attr_writer :verify_mode
23
+ # Follow location
24
+ attr_writer :follow_location
25
+
26
+ # Agent aliases given from http://www.useragentstring.com/pages/Chrome/
27
+ AGENT_ALIASES = [
28
+ # Chrome
29
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
30
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
31
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
32
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
33
+ # Firefox
34
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
35
+ 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
36
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
37
+ 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
38
+ # Internet Explorer
39
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
40
+ 'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
41
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
42
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
43
+ # Opera
44
+ 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
45
+ 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
46
+ 'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
47
+ 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52'
48
+ ].freeze
49
+
50
+ # Initialization object
51
+ #
52
+ def initialize
53
+ @debug = Debug.new
54
+
55
+ # Initialize variables agent attributes
56
+ @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
57
+ @proxy = []
58
+ @basic_auth = {}
59
+ @headers = {}
60
+ @cookies = nil
61
+ @follow_location = false
62
+ @read_timeout = 10
63
+ # Initialize variable for URI object
64
+ @uri = nil
65
+ # Initialize variable for Net::HTTP request object
66
+ @http = Net::HTTP
67
+ # Initialize variable for Net::HTTP response object
68
+ @response = nil
69
+ @verify_mode = OpenSSL::SSL::VERIFY_NONE
79
70
  end
80
- end
81
-
82
71
 
83
- # Set READ_TIMEOUT agent attribute
84
- #
85
- # @param read_timeout Waiting time to reading
86
- #
87
- def read_timeout= read_timeout
88
- fail 'attribute read_timeout must be Integer' unless read_timeout.is_a?(Integer)
89
- @read_timeout = read_timeout
90
- end
91
-
92
-
93
- # Set USER_AGENT agent attribute
94
- #
95
- # @param user_agent Web browser name
96
- #
97
- def user_agent= user_agent
98
- fail 'attribute user_agent must be String' unless user_agent.is_a?(String)
99
- @user_agent = user_agent
100
- end
101
-
102
-
103
- # Initialize Net::HTTP connection through proxy provider
104
- # TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
105
- #
106
- # @param proxy Proxy configuration
107
- #
108
- def proxy= proxy
109
- if proxy.is_a?(String)
110
- ip, port, type = proxy.split(':')
111
- fail 'attribute proxy must be in format ip:port' unless ip and port
112
- type ||= :http
113
- proxy = { ip: ip, port: port, type: type }
72
+ # Set debug configuration
73
+ #
74
+ # @param debug
75
+ #
76
+ def debug=(debug)
77
+ debug = var_to_sym(debug, true)
78
+ if debug.is_a?(Hash)
79
+ @debug.active = debug[:active]
80
+ @debug.destination = debug[:destination]
81
+ @debug.save_html = debug[:save_html]
82
+ elsif debug.is_a?(TrueClass)
83
+ @debug.active = true
84
+ end
114
85
  end
115
- proxy = var_to_sym(proxy)
116
- fail 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
117
- fail 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] and proxy[:port]
118
86
 
119
- @proxy = proxy
120
- if [:socks, 'socks'].include? proxy[:type]
121
- @http = Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s)
122
- else
123
- @http = Net::HTTP::Proxy(proxy[:ip], proxy[:port])
87
+ # Set READ_TIMEOUT agent attribute
88
+ #
89
+ # @param read_timeout Waiting time to reading
90
+ #
91
+ def read_timeout=(read_timeout)
92
+ raise 'attribute read_timeout must be Integer' unless read_timeout.is_a?(Integer)
93
+ @read_timeout = read_timeout
124
94
  end
125
- end
126
-
127
-
128
- # Set BASIC_AUTH agent attribute
129
- #
130
- # @param basic_auth Authentification configuration
131
- #
132
- def basic_auth= basic_auth
133
- basic_auth = var_to_sym(basic_auth)
134
- fail 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
135
- fail 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] and basic_auth[:password]
136
- @basic_auth = basic_auth
137
- end
138
-
139
-
140
- # Set HEADERS agent attribute
141
- #
142
- # @param headers Request headers
143
- #
144
- def headers= headers
145
- fail 'attribute headers must be Hash' unless headers.is_a?(Hash)
146
- @headers = headers
147
- end
148
95
 
96
+ # Set USER_AGENT agent attribute
97
+ #
98
+ # @param user_agent Web browser name
99
+ #
100
+ def user_agent=(user_agent)
101
+ raise 'attribute user_agent must be String' unless user_agent.is_a?(String)
102
+ @user_agent = user_agent
103
+ end
149
104
 
150
- # Set COOKIES agent attribute
151
- #
152
- # @param cookies Request cookies
153
- #
154
- def cookies= cookies
155
- cookies = var_to_sym(cookies)
156
- cookies = cookies.to_a.map { |x| "#{x[0]}=#{x[1]}" }.join('&') if cookies.is_a?(Hash)
157
- fail 'attribute cookies must be String' unless cookies.is_a?(String)
158
- @cookies = cookies
159
- end
105
+ # Initialize Net::HTTP connection through proxy provider
106
+ # TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
107
+ #
108
+ # @param proxy Proxy configuration
109
+ #
110
+ def proxy=(proxy)
111
+ if proxy.is_a?(String)
112
+ ip, port, type = proxy.split(':')
113
+ raise 'attribute proxy must be in format ip:port' unless ip && port
114
+ type ||= :http
115
+ proxy = { ip: ip, port: port, type: type }
116
+ end
117
+ proxy = var_to_sym(proxy)
118
+ raise 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
119
+ raise 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] && proxy[:port]
120
+
121
+ @proxy = proxy
122
+ @http = if [:socks, 'socks'].include? proxy[:type]
123
+ Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s)
124
+ else
125
+ Net::HTTP::Proxy(proxy[:ip], proxy[:port])
126
+ end
127
+ end
160
128
 
129
+ # Set BASIC_AUTH agent attribute
130
+ #
131
+ # @param basic_auth Authentification configuration
132
+ #
133
+ def basic_auth=(basic_auth)
134
+ basic_auth = var_to_sym(basic_auth)
135
+ raise 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
136
+ raise 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] && basic_auth[:password]
137
+ @basic_auth = basic_auth
138
+ end
161
139
 
162
- # Set verify_mode
163
- #
164
- # @param verify_mode SSL verify_mode
165
- #
166
- def verify_mode= verify_mode
167
- @verify_mode = verify_mode
168
- end
140
+ # Set HEADERS agent attribute
141
+ #
142
+ # @param headers Request headers
143
+ #
144
+ def headers=(headers)
145
+ raise 'attribute headers must be Hash' unless headers.is_a?(Hash)
146
+ @headers = headers
147
+ end
169
148
 
149
+ # Set COOKIES agent attribute
150
+ #
151
+ # @param cookies Request cookies
152
+ #
153
+ def cookies=(cookies)
154
+ cookies = var_to_sym(cookies)
155
+ cookies = cookies.to_a.map { |x| "#{x[0]}=#{x[1]}" }.join('&') if cookies.is_a?(Hash)
156
+ raise 'attribute cookies must be String' unless cookies.is_a?(String)
157
+ @cookies = cookies
158
+ end
170
159
 
171
- # Fetch request for GET and POST HTTP methods
172
- # Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute
173
- # Make response and save COOKIES for next requests
174
- #
175
- # @param url Resource link
176
- # @param method Request method
177
- # @param headers Request header
178
- # @param params Request additional params
179
- #
180
- def fetch url, method = :get, headers = {}, params = {}
181
- if @debug.active
182
- @debug.save '=============================='
183
- @debug.save "#{method.upcase} #{url}"
184
- @debug.save "-> [proxy] = #{@proxy}" if @proxy
185
- @debug.save "-> [params] = #{params}"
186
- @debug.save '------------------------------'
160
+ # Set verify_mode
161
+ #
162
+ # @param verify_mode SSL verify_mode
163
+ #
164
+ # def verify_mode=(verify_mode)
165
+ # @verify_mode = verify_mode
166
+ # end
167
+
168
+ # Init follow location for redirect
169
+ #
170
+ # @param follow_location Follow location flag
171
+ #
172
+ def follow_location=(follow_location)
173
+ raise 'attribute follow_location must be Boolean' unless follow_location.is_a?(TrueClass) || follow_location.is_a?(FalseClass)
174
+ @follow_location = follow_location
187
175
  end
188
- set_uri url
189
- case method
176
+
177
+ # Fetch request for GET and POST HTTP methods
178
+ # Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute
179
+ # Make response and save COOKIES for next requests
180
+ #
181
+ # @param url Resource link
182
+ # @param method Request method
183
+ # @param headers Request header
184
+ # @param params Request additional params
185
+ #
186
+ def fetch(url, method = :get, headers = {}, params = {})
187
+ if @debug.active
188
+ @debug.save '=============================='
189
+ @debug.save "#{method.upcase} #{url}"
190
+ @debug.save "-> [proxy] = #{@proxy}" if @proxy
191
+ @debug.save "-> [params] = #{params}"
192
+ @debug.save '------------------------------'
193
+ end
194
+ convert_to_uri url
195
+ case method
190
196
  when :get
191
197
  @request = Net::HTTP::Get.new(@uri.request_uri)
192
198
  when :post
193
199
  @request = Net::HTTP::Post.new(@uri.request_uri)
194
200
  @request.set_form_data(params)
195
- end
196
- set_user_agent if @user_agent
197
- set_basic_auth unless @basic_auth.empty?
198
- @headers = headers unless headers.empty?
199
- set_headers unless @headers.empty?
200
- set_cookies if @cookies
201
- @response = send_request
202
- case @response
201
+ end
202
+ set_user_agent if @user_agent
203
+ set_basic_auth unless @basic_auth.empty?
204
+ @headers = headers unless headers.empty?
205
+ set_headers unless @headers.empty?
206
+ set_cookies if @cookies
207
+ @response = send_request
208
+ case @response
203
209
  # HTTP response code 1xx
204
210
  when Net::HTTPInformation
205
- @debug.save "<- [response] = Net::HTTPInformation" if @debug.active
211
+ @debug.save '<- [response] = Net::HTTPInformation' if @debug.active
206
212
  # HTTP response code 2xx
207
213
  when Net::HTTPSuccess
208
214
  save_headers if @response.header
209
215
  save_cookies if @response.cookies
210
216
  @debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active
217
+ # Follow meta refresh
218
+ if @follow_location
219
+ refresh = @response.ng.at_css('meta[http-equiv="refresh"]')
220
+ @response = fetch refresh.attr('content').gsub(/\A.*?(http)/, 'http') if refresh
221
+ end
211
222
  # HTTP response code 3xx
212
223
  when Net::HTTPRedirection
213
224
  @debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active
225
+ @debug.save 'try curl user_agent: tg.user_agent=\'curl\''
226
+ # Follow location
227
+ @response = fetch @response.header['Location'] if @follow_location
214
228
  # HTTP response code 4xx
215
229
  when Net::HTTPClientError
216
230
  @debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active
217
231
  # HTTP response code 5xx
218
232
  when Net::HTTPServerError
219
233
  @debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active
234
+ end
235
+ @debug.save_to_file @response.body if @debug.save_html
236
+ @response
220
237
  end
221
- @debug.save_to_file @response.body if @debug.save_html
222
- @response
223
- end
224
-
225
-
226
- # Initialize URI object from request url
227
- #
228
- # @param url Request link
229
- #
230
- def set_uri url
231
- # It's magic work with escaped url
232
- @uri = URI(URI.escape(URI.unescape(url)))
233
- @debug.save "-> [uri] = #{@uri}" if @debug.active
234
- end
235
-
236
-
237
- # Set USER_AGENT request attribute
238
- #
239
- def set_user_agent
240
- @headers['User-Agent'] = @user_agent
241
- @debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
242
- end
243
238
 
239
+ # Initialize URI object from request url
240
+ #
241
+ # @param url Request link
242
+ #
243
+ def convert_to_uri(url)
244
+ # It's magic work with escaped url
245
+ @uri = URI(URI.escape(URI.unescape(url)))
246
+ @debug.save "-> [uri] = #{@uri}" if @debug.active
247
+ end
244
248
 
245
- # Set BASIC_AUTH request authentification
246
- #
247
- def set_basic_auth
248
- @request.basic_auth @basic_auth[:username], @basic_auth[:password]
249
- @debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
250
- end
249
+ # Set USER_AGENT request attribute
250
+ #
251
+ def set_user_agent
252
+ @headers['User-Agent'] = @user_agent
253
+ @debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
254
+ end
251
255
 
256
+ # Set BASIC_AUTH request authentification
257
+ #
258
+ def set_basic_auth
259
+ @request.basic_auth @basic_auth[:username], @basic_auth[:password]
260
+ @debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
261
+ end
252
262
 
253
- # Set request HEADERS
254
- #
255
- def set_headers
256
- @headers.each do |k, v|
257
- k = String(k)
258
- case k
263
+ # Set request HEADERS
264
+ #
265
+ def set_headers
266
+ @headers.each do |k, v|
267
+ k = String(k)
268
+ case k
259
269
  when 'Accept'
260
270
  @request[k] = v
261
271
  else
262
272
  @request.add_field(k, v)
273
+ end
263
274
  end
275
+ @debug.save "-> [headers] = #{@headers}" if @debug.active
264
276
  end
265
- @debug.save "-> [headers] = #{@headers}" if @debug.active
266
- end
267
-
268
277
 
269
- # Set request COOKIES
270
- #
271
- def set_cookies
272
- @request['Cookie'] = @cookies
273
- @debug.save "-> [cookies] = #{@cookies}" if @debug.active
274
- end
275
-
276
-
277
- # Send request and get response
278
- # Use SSL connect for HTTPS link scheme
279
- #
280
- def send_request
281
- @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http|
282
- @debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
283
- http.request(@request)
278
+ # Set request COOKIES
279
+ #
280
+ def set_cookies
281
+ @request['Cookie'] = @cookies
282
+ @debug.save "-> [cookies] = #{@cookies}" if @debug.active
284
283
  end
285
- end
286
-
287
-
288
- # Save response headers in agent attribute
289
- #
290
- def save_headers
291
- @headers = @response.headers
292
- # Delete header TRANSFER_ENCODING for chain of requests
293
- @headers.delete('transfer-encoding')
294
- @debug.save "<- [headers] = #{@headers}" if @debug.active
295
- end
296
284
 
285
+ # Send request and get response
286
+ # Use SSL connect for HTTPS link scheme
287
+ #
288
+ def send_request
289
+ @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http|
290
+ @debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
291
+ http.request(@request)
292
+ end
293
+ end
297
294
 
298
- # Save response cookies in agent attribute
299
- #
300
- def save_cookies
301
- @cookies = @response.cookies
302
- @debug.save "<- [cookies] = #{@cookies}" if @debug.active
303
- end
295
+ # Save response headers in agent attribute
296
+ #
297
+ def save_headers
298
+ @headers = @response.headers
299
+ # Delete header TRANSFER_ENCODING for chain of requests
300
+ @headers.delete('transfer-encoding')
301
+ @debug.save "<- [headers] = #{@headers}" if @debug.active
302
+ end
304
303
 
304
+ # Save response cookies in agent attribute
305
+ #
306
+ def save_cookies
307
+ @cookies = @response.cookies
308
+ @debug.save "<- [cookies] = #{@cookies}" if @debug.active
309
+ end
305
310
 
306
- # Clears headers and cookies
307
- #
308
- def reset
309
- @headers = {}
310
- @cookies = nil
311
- end
311
+ # Clears headers and cookies
312
+ #
313
+ def reset
314
+ @headers = {}
315
+ @cookies = nil
316
+ end
312
317
 
313
- # Convert variables and contains to symbol
314
- #
315
- # @param var Variable need to convert
316
- #
317
- def var_to_sym var, str_to_sym = false
318
- if var.is_a?(Hash)
319
- result = {}
320
- var.each do |k, v|
321
- result[k.to_sym] = var_to_sym(v, str_to_sym)
322
- end
323
- elsif var.is_a?(Array)
324
- result = []
325
- var.each do |v|
326
- result << var_to_sym(v, str_to_sym)
318
+ # Convert variables and contains to symbol
319
+ #
320
+ # @param var Variable need to convert
321
+ #
322
+ def var_to_sym(var, str_to_sym = false)
323
+ if var.is_a?(Hash)
324
+ result = {}
325
+ var.each do |k, v|
326
+ result[k.to_sym] = var_to_sym(v, str_to_sym)
327
+ end
328
+ elsif var.is_a?(Array)
329
+ result = []
330
+ var.each do |v|
331
+ result << var_to_sym(v, str_to_sym)
332
+ end
333
+ elsif var.is_a?(String)
334
+ result = str_to_sym ? var.to_sym : var
335
+ else
336
+ result = var
327
337
  end
328
- elsif var.is_a?(String)
329
- result = str_to_sym ? var.to_sym : var
330
- else
331
- result = var
338
+ result
332
339
  end
333
- result
334
340
  end
335
- end
341
+ end
@@ -15,76 +15,30 @@ class Debug
15
15
  @save_html = false
16
16
  end
17
17
 
18
-
19
- # Set debug active flag
20
- #
21
- # @param active Flag
22
- #
23
- def active= active
24
- @active = active
25
- end
26
-
27
-
28
- # Get debug active flag
29
- def active
30
- @active
31
- end
32
-
33
-
34
- # Set debug destination
35
- #
36
- # @param destination Save log to file or print
37
- #
38
- def destination= destination
39
- @destination = destination
40
- end
41
-
42
-
43
- # Get debug destination
44
- #
45
- def destination
46
- @destination
47
- end
48
-
49
-
50
- # Set debug flag to save response HTML to file
51
- #
52
- # @param save_html Flag
53
- def save_html= save_html
54
- @save_html = save_html
55
- end
56
-
57
-
58
- # Get debug flag to save response HTML to file
59
- #
60
- def save_html
61
- @save_html
62
- end
63
-
64
-
65
18
  # Save log information
66
19
  #
67
20
  # @param message Message body
68
21
  #
69
- def save message
22
+ def save(message)
70
23
  message = "TG | #{Time.now.strftime('%Y%m%d-%H%M%S')} | #{message}"
71
24
  case @destination
72
- when :file
73
- save_to_file message
74
- when :print
75
- p message
25
+ when :file
26
+ save_to_file message
27
+ when :print
28
+ p message
76
29
  end
77
30
  end
78
31
 
79
-
80
32
  # Save log information to file
81
33
  #
82
34
  # @param message Message body
83
35
  #
84
- def save_to_file message
36
+ def save_to_file(message)
37
+ # Encode message for correct Unix encoding
38
+ message = message.force_encoding('utf-8')
85
39
  debug_path = "#{Dir.pwd}/log"
86
- Dir.mkdir(debug_path, 0775) unless File.exists? debug_path
40
+ Dir.mkdir(debug_path, 0o775) unless File.exist? debug_path
87
41
  filename = "#{Time.now.strftime('%Y%m%d')}.log"
88
42
  File.open("#{debug_path}/#{filename}", 'a+') { |f| f << "#{message}\r\n" }
89
43
  end
90
- end
44
+ end
@@ -4,29 +4,27 @@ require 'nokogiri'
4
4
  module Net
5
5
  # Success response class
6
6
  class HTTPOK
7
-
8
7
  # Nokogiri object of response
9
8
  #
10
9
  def ng
11
- Nokogiri::HTML(self.body)
10
+ Nokogiri::HTML(body)
12
11
  end
13
12
 
14
13
  # Response Cookies
15
14
  #
16
15
  def cookies
17
- cookies = self.get_fields('set-cookie')
16
+ cookies = get_fields('set-cookie')
18
17
  if cookies
19
18
  cookies.map { |cookie| cookie.gsub(/\A([^;]+).*\Z/, '\1') }.join('&')
20
- else
21
- nil
22
19
  end
23
20
  end
24
21
 
25
-
26
22
  # Response Headers
27
23
  #
28
24
  def headers
29
- self.header.to_hash.inject({}) { |headers, (header_key, header_value)| headers[header_key] = header_value.first; headers }
25
+ header.to_hash.each_with_object({}) do |header_key, header_value|
26
+ header_value[header_key] = header_value.first
27
+ end
30
28
  end
31
29
  end
32
- end
30
+ end
@@ -1,4 +1,4 @@
1
1
  class TinyGrabber
2
2
  # Version number
3
- VERSION = '0.2.8'
3
+ VERSION = '0.3.0'.freeze
4
4
  end
data/lib/tiny_grabber.rb CHANGED
@@ -10,17 +10,15 @@ require 'tiny_grabber/http'
10
10
  # Main class for TinyGrabber
11
11
  #
12
12
  class TinyGrabber
13
-
14
13
  # Initialize a new TinyGrabber user agent.
15
14
  #
16
15
  def initialize
17
16
  @agent = TinyGrabber::Agent.new
18
17
  end
19
18
 
20
-
21
19
  # Singleton > Initialize a new TinyGrabber user agent.
22
20
  #
23
- def self.initialize config = {}
21
+ def self.initialize(config = {})
24
22
  @agent = TinyGrabber::Agent.new
25
23
 
26
24
  @agent.debug = config[:debug] if config[:debug]
@@ -32,13 +30,12 @@ class TinyGrabber
32
30
  @agent.cookies = config[:cookies] if config[:cookies]
33
31
  end
34
32
 
35
-
36
33
  # HTTP::GET request
37
34
  #
38
35
  # @param url Resource link
39
36
  # @param headers Request header
40
37
  #
41
- def get url, headers = {}
38
+ def get(url, headers = {})
42
39
  @agent.fetch url, :get, headers
43
40
  end
44
41
 
@@ -47,129 +44,115 @@ class TinyGrabber
47
44
  # @param url Resource link
48
45
  # @param headers Request header
49
46
  #
50
- def self.get url, config = {}
51
- initialize config
47
+ def self.get(url, config = {})
48
+ initialize config
52
49
  @agent.fetch url, :get
53
50
  end
54
51
 
55
-
56
52
  # HTTP::POST request
57
53
  #
58
54
  # @param url Resource link
59
55
  # @param params Request post data
60
56
  # @param headers Request header
61
57
  #
62
- def post url, params = {}, headers = {}
58
+ def post(url, params = {}, headers = {})
63
59
  @agent.fetch url, :post, headers, params
64
60
  end
65
61
 
66
-
67
62
  # Singleton > HTTP::GET request
68
63
  #
69
64
  # @param url Resource link
70
65
  # @param headers Request header
71
66
  #
72
- def self.post url, params = {}, config = {}
73
- initialize config
67
+ def self.post(url, params = {}, config = {})
68
+ initialize config
74
69
  @agent.fetch url, :post, {}, params
75
70
  end
76
71
 
77
-
78
72
  # Set DEBUG flag
79
73
  #
80
74
  # @param debug Flag to start debug
81
75
  #
82
- def debug= debug
76
+ def debug=(debug)
83
77
  @agent.debug = debug
84
78
  end
85
79
 
86
-
87
80
  # Read READ_TIMEOUT agent attribute
88
81
  #
89
82
  def read_timeout
90
83
  @agent.read_timeout
91
84
  end
92
85
 
93
-
94
86
  # Set READ_TIMEOUT agent attribute
95
87
  #
96
88
  # @param read_timeout Waiting time to reading
97
89
  #
98
- def read_timeout= read_timeout
90
+ def read_timeout=(read_timeout)
99
91
  @agent.read_timeout = read_timeout
100
92
  end
101
93
 
102
-
103
94
  # Read USER_AGENT agent attribute
104
95
  #
105
96
  def user_agent
106
97
  @agent.user_agent
107
98
  end
108
99
 
109
-
110
100
  # Set USER_AGENT agent attribute
111
101
  #
112
102
  # @param user_agent Web browser name
113
103
  #
114
- def user_agent= user_agent
104
+ def user_agent=(user_agent)
115
105
  @agent.user_agent = user_agent
116
106
  end
117
107
 
118
-
119
108
  # Read PROXY agent attribute
120
109
  #
121
110
  def proxy
122
111
  @agent.proxy
123
112
  end
124
113
 
125
-
126
114
  # Set PROXY agent attribute
127
115
  #
128
116
  # @param proxy Proxy configuration
129
117
  #
130
- def proxy= proxy
118
+ def proxy=(proxy)
131
119
  @agent.proxy = proxy
132
120
  end
133
121
 
134
-
135
122
  # Set BASIC_AUTH agent attribute
136
123
  #
137
124
  # @param username Authentification username
138
125
  # @param password Authentification password
139
126
  #
140
- def basic_auth username, password
127
+ def basic_auth(username, password)
141
128
  @agent.basic_auth = { username: username, password: password }
142
129
  end
143
130
 
144
-
145
131
  # Read HEADERS agent attribute
146
132
  #
147
133
  def headers
148
134
  @agent.headers
149
135
  end
150
136
 
151
-
152
137
  # Set HEADERS agent attribute
153
138
  #
154
139
  # @param headers Request headers
155
140
  #
156
- def headers= headers
141
+ def headers=(headers)
157
142
  @agent.headers = headers
158
143
  end
159
144
 
160
-
161
145
  # Read COOKIES agent attribute
162
146
  #
163
147
  def cookies
164
148
  @agent.cookies
165
149
  end
166
150
 
167
-
168
151
  # Set COOKIES agent attribute
169
152
  #
170
153
  # @param cookies Request cookies
171
154
  #
172
- def cookies= cookies
155
+ def cookies=(cookies)
173
156
  @agent.cookies = cookies
174
157
  end
175
158
 
@@ -179,12 +162,19 @@ class TinyGrabber
179
162
  @agent.reset
180
163
  end
181
164
 
182
-
183
165
  # Set verify_mode
184
166
  #
185
167
  # @param verify_mode SSL verify mode
186
168
  #
187
- def verify_mode= verify_mode
169
+ def verify_mode=(verify_mode)
188
170
  @agent.verify_mode = verify_mode
189
171
  end
190
- end
172
+
173
+ # Set follow_location
174
+ #
175
+ # @param follow_location Follow location flag
176
+ #
177
+ def follow_location=(follow_location)
178
+ @agent.follow_location = follow_location
179
+ end
180
+ end
data/tiny_grabber.gemspec CHANGED
@@ -6,11 +6,11 @@ require 'tiny_grabber/version'
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = 'tiny_grabber'
8
8
  spec.version = TinyGrabber::VERSION
9
- spec.authors = ["Aleksandr Chernyshev"]
9
+ spec.authors = ['Aleksandr Chernyshev']
10
10
  spec.email = ['moroznoeytpo@gmail.com']
11
11
 
12
- spec.summary = %q{Tiny grabber}
13
- spec.description = %q{Simple gem for grabbing remote web page.}
12
+ spec.summary = 'Tiny grabber'
13
+ spec.description = 'Simple gem for grabbing remote web page.'
14
14
  spec.homepage = 'https://github.com/moroznoeytpo/tiny_grabber'
15
15
  spec.license = 'MIT'
16
16
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiny_grabber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandr Chernyshev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-04 00:00:00.000000000 Z
11
+ date: 2016-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: socksify