tiny_grabber 0.2.8 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51f1a586c038410a555be4a5d046134f92786f75
4
- data.tar.gz: 0bdb153926a9553276d06f7c40666d366fcbf0a6
3
+ metadata.gz: 96bdccdeb24ccdbbb99e4cb2e73bf1450aca4e4f
4
+ data.tar.gz: 785bed54953a6faa41325aa8d8dda56f688b638d
5
5
  SHA512:
6
- metadata.gz: d9e986b006f6734e043ddedd168bbc66925ed97380b18e1d1651f0dfedcd9c36caccccd13eb3fb363d5b292ada62c2c41209cd862649552cfaf175c7bb732b6e
7
- data.tar.gz: a849cf33106b8a119da43e4bd05dbc3e1cd592ab78947e7d8868d2cc2c2b7e4043dcd03678c56ed12ae91e98b4a26e746f4cd51803ab6cd3c3d8c926433ac0c7
6
+ metadata.gz: 4256e04d8e42b404c09e32ad66bd99c4c1f38680be021fcb6e36e6e8a1e86e65be548945d853583fad0177d53e2732a275ec804079531757d477af91bd403674
7
+ data.tar.gz: 46f84bf20a8c0e4de69e3bea855c5d3a26bd66332c75fbb7e2e6ed1805364c05262e10d897f4ea458931ac842819baee55f0a3c4cd7ed731ce7207c2bdd8d149
data/Gemfile CHANGED
@@ -2,3 +2,7 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in tiny_grabber.gemspec
4
4
  gemspec
5
+
6
+ gem 'rubocop', require: false
7
+ gem 'byebug'
8
+ gem 'timezone', '~> 1.0'
data/README.md CHANGED
@@ -126,6 +126,11 @@ response.body
126
126
 
127
127
  ## Changelog
128
128
 
129
+ * *v 0.2.9*
130
+ * Added agent attribute for redirect follow location
131
+ * Used 302 http answer code and header location for redirecting
132
+ * Used meta refresh url
133
+ * Refactored code for rubocop
129
134
  * *v 0.2.8*
130
135
  * Added processing Accept headers
131
136
  * *v 0.2.7*
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
@@ -2,334 +2,340 @@
2
2
  # Initialize connect with Resource
3
3
  # Setting connect attributes
4
4
  #
5
- class TinyGrabber::Agent
6
- # Debug configuration
7
- attr_accessor :debug
8
- # Max time to execute request
9
- attr_accessor :read_timeout
10
- # Web browser name
11
- attr_accessor :user_agent
12
- # Remote proxy configuration
13
- attr_accessor :proxy
14
- # Basic authentification configuration
15
- attr_accessor :basic_auth
16
- # Headers
17
- attr_accessor :headers
18
- # Headers
19
- attr_accessor :cookies
20
-
21
- # Agent aliases given from http://www.useragentstring.com/pages/Chrome/
22
- AGENT_ALIASES = [
23
- # Chrome
24
- 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
25
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
26
- 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
27
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
28
- # Firefox
29
- 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
30
- 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
31
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
32
- 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
33
- # Internet Explorer
34
- 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
35
- 'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
36
- 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
37
- 'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
38
- # Opera
39
- 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
40
- 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
41
- 'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
42
- 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
43
- ]
44
-
45
- # Initialization object
46
- #
47
- def initialize
48
- @debug = Debug.new
49
-
50
- # Initialize variables agent attributes
51
- @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
52
- @proxy = []
53
- @basic_auth = {}
54
- @headers = {}
55
- @cookies = nil
56
- @read_timeout = 10
57
- # Initialize variable for URI object
58
- @uri = nil
59
- # Initialize variable for Net::HTTP request object
60
- @http = Net::HTTP
61
- # Initialize variable for Net::HTTP response object
62
- @response = nil
63
- @verify_mode = OpenSSL::SSL::VERIFY_NONE
64
- end
65
-
66
-
67
- # Set debug configuration
68
- #
69
- # @param debug
70
- #
71
- def debug= debug
72
- debug = var_to_sym(debug, true)
73
- if debug.is_a?(Hash)
74
- @debug.active = debug[:active]
75
- @debug.destination = debug[:destination]
76
- @debug.save_html = debug[:save_html]
77
- elsif debug.is_a?(TrueClass)
78
- @debug.active = true
5
+ class TinyGrabber
6
+ class Agent
7
+ # Debug configuration
8
+ attr_writer :debug
9
+ # Max time to execute request
10
+ attr_writer :read_timeout
11
+ # Web browser name
12
+ attr_writer :user_agent
13
+ # Remote proxy configuration
14
+ attr_accessor :proxy
15
+ # Basic authentification configuration
16
+ attr_writer :basic_auth
17
+ # Headers
18
+ attr_writer :headers
19
+ # Headers
20
+ attr_writer :cookies
21
+ # Set verify mode
22
+ attr_writer :verify_mode
23
+ # Follow location
24
+ attr_writer :follow_location
25
+
26
+ # Agent aliases given from http://www.useragentstring.com/pages/Chrome/
27
+ AGENT_ALIASES = [
28
+ # Chrome
29
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
30
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
31
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
32
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
33
+ # Firefox
34
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
35
+ 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
36
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
37
+ 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
38
+ # Internet Explorer
39
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
40
+ 'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
41
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
42
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
43
+ # Opera
44
+ 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
45
+ 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
46
+ 'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
47
+ 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52'
48
+ ].freeze
49
+
50
+ # Initialization object
51
+ #
52
+ def initialize
53
+ @debug = Debug.new
54
+
55
+ # Initialize variables agent attributes
56
+ @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
57
+ @proxy = []
58
+ @basic_auth = {}
59
+ @headers = {}
60
+ @cookies = nil
61
+ @follow_location = false
62
+ @read_timeout = 10
63
+ # Initialize variable for URI object
64
+ @uri = nil
65
+ # Initialize variable for Net::HTTP request object
66
+ @http = Net::HTTP
67
+ # Initialize variable for Net::HTTP response object
68
+ @response = nil
69
+ @verify_mode = OpenSSL::SSL::VERIFY_NONE
79
70
  end
80
- end
81
-
82
71
 
83
- # Set READ_TIMEOUT agent attribute
84
- #
85
- # @param read_timeout Waiting time to reading
86
- #
87
- def read_timeout= read_timeout
88
- fail 'attribute read_timeout must be Integer' unless read_timeout.is_a?(Integer)
89
- @read_timeout = read_timeout
90
- end
91
-
92
-
93
- # Set USER_AGENT agent attribute
94
- #
95
- # @param user_agent Web browser name
96
- #
97
- def user_agent= user_agent
98
- fail 'attribute user_agent must be String' unless user_agent.is_a?(String)
99
- @user_agent = user_agent
100
- end
101
-
102
-
103
- # Initialize Net::HTTP connection through proxy provider
104
- # TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
105
- #
106
- # @param proxy Proxy configuration
107
- #
108
- def proxy= proxy
109
- if proxy.is_a?(String)
110
- ip, port, type = proxy.split(':')
111
- fail 'attribute proxy must be in format ip:port' unless ip and port
112
- type ||= :http
113
- proxy = { ip: ip, port: port, type: type }
72
+ # Set debug configuration
73
+ #
74
+ # @param debug
75
+ #
76
+ def debug=(debug)
77
+ debug = var_to_sym(debug, true)
78
+ if debug.is_a?(Hash)
79
+ @debug.active = debug[:active]
80
+ @debug.destination = debug[:destination]
81
+ @debug.save_html = debug[:save_html]
82
+ elsif debug.is_a?(TrueClass)
83
+ @debug.active = true
84
+ end
114
85
  end
115
- proxy = var_to_sym(proxy)
116
- fail 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
117
- fail 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] and proxy[:port]
118
86
 
119
- @proxy = proxy
120
- if [:socks, 'socks'].include? proxy[:type]
121
- @http = Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s)
122
- else
123
- @http = Net::HTTP::Proxy(proxy[:ip], proxy[:port])
87
+ # Set READ_TIMEOUT agent attribute
88
+ #
89
+ # @param read_timeout Waiting time to reading
90
+ #
91
+ def read_timeout=(read_timeout)
92
+ raise 'attribute read_timeout must be Integer' unless read_timeout.is_a?(Integer)
93
+ @read_timeout = read_timeout
124
94
  end
125
- end
126
-
127
-
128
- # Set BASIC_AUTH agent attribute
129
- #
130
- # @param basic_auth Authentification configuration
131
- #
132
- def basic_auth= basic_auth
133
- basic_auth = var_to_sym(basic_auth)
134
- fail 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
135
- fail 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] and basic_auth[:password]
136
- @basic_auth = basic_auth
137
- end
138
-
139
-
140
- # Set HEADERS agent attribute
141
- #
142
- # @param headers Request headers
143
- #
144
- def headers= headers
145
- fail 'attribute headers must be Hash' unless headers.is_a?(Hash)
146
- @headers = headers
147
- end
148
95
 
96
+ # Set USER_AGENT agent attribute
97
+ #
98
+ # @param user_agent Web browser name
99
+ #
100
+ def user_agent=(user_agent)
101
+ raise 'attribute user_agent must be String' unless user_agent.is_a?(String)
102
+ @user_agent = user_agent
103
+ end
149
104
 
150
- # Set COOKIES agent attribute
151
- #
152
- # @param cookies Request cookies
153
- #
154
- def cookies= cookies
155
- cookies = var_to_sym(cookies)
156
- cookies = cookies.to_a.map { |x| "#{x[0]}=#{x[1]}" }.join('&') if cookies.is_a?(Hash)
157
- fail 'attribute cookies must be String' unless cookies.is_a?(String)
158
- @cookies = cookies
159
- end
105
+ # Initialize Net::HTTP connection through proxy provider
106
+ # TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
107
+ #
108
+ # @param proxy Proxy configuration
109
+ #
110
+ def proxy=(proxy)
111
+ if proxy.is_a?(String)
112
+ ip, port, type = proxy.split(':')
113
+ raise 'attribute proxy must be in format ip:port' unless ip && port
114
+ type ||= :http
115
+ proxy = { ip: ip, port: port, type: type }
116
+ end
117
+ proxy = var_to_sym(proxy)
118
+ raise 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
119
+ raise 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] && proxy[:port]
120
+
121
+ @proxy = proxy
122
+ @http = if [:socks, 'socks'].include? proxy[:type]
123
+ Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s)
124
+ else
125
+ Net::HTTP::Proxy(proxy[:ip], proxy[:port])
126
+ end
127
+ end
160
128
 
129
+ # Set BASIC_AUTH agent attribute
130
+ #
131
+ # @param basic_auth Authentification configuration
132
+ #
133
+ def basic_auth=(basic_auth)
134
+ basic_auth = var_to_sym(basic_auth)
135
+ raise 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
136
+ raise 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] && basic_auth[:password]
137
+ @basic_auth = basic_auth
138
+ end
161
139
 
162
- # Set verify_mode
163
- #
164
- # @param verify_mode SSL verify_mode
165
- #
166
- def verify_mode= verify_mode
167
- @verify_mode = verify_mode
168
- end
140
+ # Set HEADERS agent attribute
141
+ #
142
+ # @param headers Request headers
143
+ #
144
+ def headers=(headers)
145
+ raise 'attribute headers must be Hash' unless headers.is_a?(Hash)
146
+ @headers = headers
147
+ end
169
148
 
149
+ # Set COOKIES agent attribute
150
+ #
151
+ # @param cookies Request cookies
152
+ #
153
+ def cookies=(cookies)
154
+ cookies = var_to_sym(cookies)
155
+ cookies = cookies.to_a.map { |x| "#{x[0]}=#{x[1]}" }.join('&') if cookies.is_a?(Hash)
156
+ raise 'attribute cookies must be String' unless cookies.is_a?(String)
157
+ @cookies = cookies
158
+ end
170
159
 
171
- # Fetch request for GET and POST HTTP methods
172
- # Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute
173
- # Make response and save COOKIES for next requests
174
- #
175
- # @param url Resource link
176
- # @param method Request method
177
- # @param headers Request header
178
- # @param params Request additional params
179
- #
180
- def fetch url, method = :get, headers = {}, params = {}
181
- if @debug.active
182
- @debug.save '=============================='
183
- @debug.save "#{method.upcase} #{url}"
184
- @debug.save "-> [proxy] = #{@proxy}" if @proxy
185
- @debug.save "-> [params] = #{params}"
186
- @debug.save '------------------------------'
160
+ # Set verify_mode
161
+ #
162
+ # @param verify_mode SSL verify_mode
163
+ #
164
+ # def verify_mode=(verify_mode)
165
+ # @verify_mode = verify_mode
166
+ # end
167
+
168
+ # Init follow location for redirect
169
+ #
170
+ # @param follow_location Follow location flag
171
+ #
172
+ def follow_location=(follow_location)
173
+ raise 'attribute follow_location must be Boolean' unless follow_location.is_a?(TrueClass) || follow_location.is_a?(FalseClass)
174
+ @follow_location = follow_location
187
175
  end
188
- set_uri url
189
- case method
176
+
177
+ # Fetch request for GET and POST HTTP methods
178
+ # Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute
179
+ # Make response and save COOKIES for next requests
180
+ #
181
+ # @param url Resource link
182
+ # @param method Request method
183
+ # @param headers Request header
184
+ # @param params Request additional params
185
+ #
186
+ def fetch(url, method = :get, headers = {}, params = {})
187
+ if @debug.active
188
+ @debug.save '=============================='
189
+ @debug.save "#{method.upcase} #{url}"
190
+ @debug.save "-> [proxy] = #{@proxy}" if @proxy
191
+ @debug.save "-> [params] = #{params}"
192
+ @debug.save '------------------------------'
193
+ end
194
+ convert_to_uri url
195
+ case method
190
196
  when :get
191
197
  @request = Net::HTTP::Get.new(@uri.request_uri)
192
198
  when :post
193
199
  @request = Net::HTTP::Post.new(@uri.request_uri)
194
200
  @request.set_form_data(params)
195
- end
196
- set_user_agent if @user_agent
197
- set_basic_auth unless @basic_auth.empty?
198
- @headers = headers unless headers.empty?
199
- set_headers unless @headers.empty?
200
- set_cookies if @cookies
201
- @response = send_request
202
- case @response
201
+ end
202
+ set_user_agent if @user_agent
203
+ set_basic_auth unless @basic_auth.empty?
204
+ @headers = headers unless headers.empty?
205
+ set_headers unless @headers.empty?
206
+ set_cookies if @cookies
207
+ @response = send_request
208
+ case @response
203
209
  # HTTP response code 1xx
204
210
  when Net::HTTPInformation
205
- @debug.save "<- [response] = Net::HTTPInformation" if @debug.active
211
+ @debug.save '<- [response] = Net::HTTPInformation' if @debug.active
206
212
  # HTTP response code 2xx
207
213
  when Net::HTTPSuccess
208
214
  save_headers if @response.header
209
215
  save_cookies if @response.cookies
210
216
  @debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active
217
+ # Follow meta refresh
218
+ if @follow_location
219
+ refresh = @response.ng.at_css('meta[http-equiv="refresh"]')
220
+ @response = fetch refresh.attr('content').gsub(/\A.*?(http)/, 'http') if refresh
221
+ end
211
222
  # HTTP response code 3xx
212
223
  when Net::HTTPRedirection
213
224
  @debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active
225
+ @debug.save 'try curl user_agent: tg.user_agent=\'curl\''
226
+ # Follow location
227
+ @response = fetch @response.header['Location'] if @follow_location
214
228
  # HTTP response code 4xx
215
229
  when Net::HTTPClientError
216
230
  @debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active
217
231
  # HTTP response code 5xx
218
232
  when Net::HTTPServerError
219
233
  @debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active
234
+ end
235
+ @debug.save_to_file @response.body if @debug.save_html
236
+ @response
220
237
  end
221
- @debug.save_to_file @response.body if @debug.save_html
222
- @response
223
- end
224
-
225
-
226
- # Initialize URI object from request url
227
- #
228
- # @param url Request link
229
- #
230
- def set_uri url
231
- # It's magic work with escaped url
232
- @uri = URI(URI.escape(URI.unescape(url)))
233
- @debug.save "-> [uri] = #{@uri}" if @debug.active
234
- end
235
-
236
-
237
- # Set USER_AGENT request attribute
238
- #
239
- def set_user_agent
240
- @headers['User-Agent'] = @user_agent
241
- @debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
242
- end
243
238
 
239
+ # Initialize URI object from request url
240
+ #
241
+ # @param url Request link
242
+ #
243
+ def convert_to_uri(url)
244
+ # It's magic work with escaped url
245
+ @uri = URI(URI.escape(URI.unescape(url)))
246
+ @debug.save "-> [uri] = #{@uri}" if @debug.active
247
+ end
244
248
 
245
- # Set BASIC_AUTH request authentification
246
- #
247
- def set_basic_auth
248
- @request.basic_auth @basic_auth[:username], @basic_auth[:password]
249
- @debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
250
- end
249
+ # Set USER_AGENT request attribute
250
+ #
251
+ def set_user_agent
252
+ @headers['User-Agent'] = @user_agent
253
+ @debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
254
+ end
251
255
 
256
+ # Set BASIC_AUTH request authentification
257
+ #
258
+ def set_basic_auth
259
+ @request.basic_auth @basic_auth[:username], @basic_auth[:password]
260
+ @debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
261
+ end
252
262
 
253
- # Set request HEADERS
254
- #
255
- def set_headers
256
- @headers.each do |k, v|
257
- k = String(k)
258
- case k
263
+ # Set request HEADERS
264
+ #
265
+ def set_headers
266
+ @headers.each do |k, v|
267
+ k = String(k)
268
+ case k
259
269
  when 'Accept'
260
270
  @request[k] = v
261
271
  else
262
272
  @request.add_field(k, v)
273
+ end
263
274
  end
275
+ @debug.save "-> [headers] = #{@headers}" if @debug.active
264
276
  end
265
- @debug.save "-> [headers] = #{@headers}" if @debug.active
266
- end
267
-
268
277
 
269
- # Set request COOKIES
270
- #
271
- def set_cookies
272
- @request['Cookie'] = @cookies
273
- @debug.save "-> [cookies] = #{@cookies}" if @debug.active
274
- end
275
-
276
-
277
- # Send request and get response
278
- # Use SSL connect for HTTPS link scheme
279
- #
280
- def send_request
281
- @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http|
282
- @debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
283
- http.request(@request)
278
+ # Set request COOKIES
279
+ #
280
+ def set_cookies
281
+ @request['Cookie'] = @cookies
282
+ @debug.save "-> [cookies] = #{@cookies}" if @debug.active
284
283
  end
285
- end
286
-
287
-
288
- # Save response headers in agent attribute
289
- #
290
- def save_headers
291
- @headers = @response.headers
292
- # Delete header TRANSFER_ENCODING for chain of requests
293
- @headers.delete('transfer-encoding')
294
- @debug.save "<- [headers] = #{@headers}" if @debug.active
295
- end
296
284
 
285
+ # Send request and get response
286
+ # Use SSL connect for HTTPS link scheme
287
+ #
288
+ def send_request
289
+ @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http|
290
+ @debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
291
+ http.request(@request)
292
+ end
293
+ end
297
294
 
298
- # Save response cookies in agent attribute
299
- #
300
- def save_cookies
301
- @cookies = @response.cookies
302
- @debug.save "<- [cookies] = #{@cookies}" if @debug.active
303
- end
295
+ # Save response headers in agent attribute
296
+ #
297
+ def save_headers
298
+ @headers = @response.headers
299
+ # Delete header TRANSFER_ENCODING for chain of requests
300
+ @headers.delete('transfer-encoding')
301
+ @debug.save "<- [headers] = #{@headers}" if @debug.active
302
+ end
304
303
 
304
+ # Save response cookies in agent attribute
305
+ #
306
+ def save_cookies
307
+ @cookies = @response.cookies
308
+ @debug.save "<- [cookies] = #{@cookies}" if @debug.active
309
+ end
305
310
 
306
- # Clears headers and cookies
307
- #
308
- def reset
309
- @headers = {}
310
- @cookies = nil
311
- end
311
+ # Clears headers and cookies
312
+ #
313
+ def reset
314
+ @headers = {}
315
+ @cookies = nil
316
+ end
312
317
 
313
- # Convert variables and contains to symbol
314
- #
315
- # @param var Variable need to convert
316
- #
317
- def var_to_sym var, str_to_sym = false
318
- if var.is_a?(Hash)
319
- result = {}
320
- var.each do |k, v|
321
- result[k.to_sym] = var_to_sym(v, str_to_sym)
322
- end
323
- elsif var.is_a?(Array)
324
- result = []
325
- var.each do |v|
326
- result << var_to_sym(v, str_to_sym)
318
+ # Convert variables and contains to symbol
319
+ #
320
+ # @param var Variable need to convert
321
+ #
322
+ def var_to_sym(var, str_to_sym = false)
323
+ if var.is_a?(Hash)
324
+ result = {}
325
+ var.each do |k, v|
326
+ result[k.to_sym] = var_to_sym(v, str_to_sym)
327
+ end
328
+ elsif var.is_a?(Array)
329
+ result = []
330
+ var.each do |v|
331
+ result << var_to_sym(v, str_to_sym)
332
+ end
333
+ elsif var.is_a?(String)
334
+ result = str_to_sym ? var.to_sym : var
335
+ else
336
+ result = var
327
337
  end
328
- elsif var.is_a?(String)
329
- result = str_to_sym ? var.to_sym : var
330
- else
331
- result = var
338
+ result
332
339
  end
333
- result
334
340
  end
335
- end
341
+ end
@@ -15,76 +15,30 @@ class Debug
15
15
  @save_html = false
16
16
  end
17
17
 
18
-
19
- # Set debug active flag
20
- #
21
- # @param active Flag
22
- #
23
- def active= active
24
- @active = active
25
- end
26
-
27
-
28
- # Get debug active flag
29
- def active
30
- @active
31
- end
32
-
33
-
34
- # Set debug destination
35
- #
36
- # @param destination Save log to file or print
37
- #
38
- def destination= destination
39
- @destination = destination
40
- end
41
-
42
-
43
- # Get debug destination
44
- #
45
- def destination
46
- @destination
47
- end
48
-
49
-
50
- # Set debug flag to save response HTML to file
51
- #
52
- # @param save_html Flag
53
- def save_html= save_html
54
- @save_html = save_html
55
- end
56
-
57
-
58
- # Get debug flag to save response HTML to file
59
- #
60
- def save_html
61
- @save_html
62
- end
63
-
64
-
65
18
  # Save log information
66
19
  #
67
20
  # @param message Message body
68
21
  #
69
- def save message
22
+ def save(message)
70
23
  message = "TG | #{Time.now.strftime('%Y%m%d-%H%M%S')} | #{message}"
71
24
  case @destination
72
- when :file
73
- save_to_file message
74
- when :print
75
- p message
25
+ when :file
26
+ save_to_file message
27
+ when :print
28
+ p message
76
29
  end
77
30
  end
78
31
 
79
-
80
32
  # Save log information to file
81
33
  #
82
34
  # @param message Message body
83
35
  #
84
- def save_to_file message
36
+ def save_to_file(message)
37
+ # Encode message for correct Unix encoding
38
+ message = message.force_encoding('utf-8')
85
39
  debug_path = "#{Dir.pwd}/log"
86
- Dir.mkdir(debug_path, 0775) unless File.exists? debug_path
40
+ Dir.mkdir(debug_path, 0o775) unless File.exist? debug_path
87
41
  filename = "#{Time.now.strftime('%Y%m%d')}.log"
88
42
  File.open("#{debug_path}/#{filename}", 'a+') { |f| f << "#{message}\r\n" }
89
43
  end
90
- end
44
+ end
@@ -4,29 +4,27 @@ require 'nokogiri'
4
4
  module Net
5
5
  # Success response class
6
6
  class HTTPOK
7
-
8
7
  # Nokogiri object of response
9
8
  #
10
9
  def ng
11
- Nokogiri::HTML(self.body)
10
+ Nokogiri::HTML(body)
12
11
  end
13
12
 
14
13
  # Response Cookies
15
14
  #
16
15
  def cookies
17
- cookies = self.get_fields('set-cookie')
16
+ cookies = get_fields('set-cookie')
18
17
  if cookies
19
18
  cookies.map { |cookie| cookie.gsub(/\A([^;]+).*\Z/, '\1') }.join('&')
20
- else
21
- nil
22
19
  end
23
20
  end
24
21
 
25
-
26
22
  # Response Headers
27
23
  #
28
24
  def headers
29
- self.header.to_hash.inject({}) { |headers, (header_key, header_value)| headers[header_key] = header_value.first; headers }
25
+ header.to_hash.each_with_object({}) do |header_key, header_value|
26
+ header_value[header_key] = header_value.first
27
+ end
30
28
  end
31
29
  end
32
- end
30
+ end
@@ -1,4 +1,4 @@
1
1
  class TinyGrabber
2
2
  # Version number
3
- VERSION = '0.2.8'
3
+ VERSION = '0.3.0'.freeze
4
4
  end
data/lib/tiny_grabber.rb CHANGED
@@ -10,17 +10,15 @@ require 'tiny_grabber/http'
10
10
  # Main class for TinyGrabber
11
11
  #
12
12
  class TinyGrabber
13
-
14
13
  # Initialize a new TinyGrabber user agent.
15
14
  #
16
15
  def initialize
17
16
  @agent = TinyGrabber::Agent.new
18
17
  end
19
18
 
20
-
21
19
  # Singleton > Initialize a new TinyGrabber user agent.
22
20
  #
23
- def self.initialize config = {}
21
+ def self.initialize(config = {})
24
22
  @agent = TinyGrabber::Agent.new
25
23
 
26
24
  @agent.debug = config[:debug] if config[:debug]
@@ -32,13 +30,12 @@ class TinyGrabber
32
30
  @agent.cookies = config[:cookies] if config[:cookies]
33
31
  end
34
32
 
35
-
36
33
  # HTTP::GET request
37
34
  #
38
35
  # @param url Resource link
39
36
  # @param headers Request header
40
37
  #
41
- def get url, headers = {}
38
+ def get(url, headers = {})
42
39
  @agent.fetch url, :get, headers
43
40
  end
44
41
 
@@ -47,129 +44,115 @@ class TinyGrabber
47
44
  # @param url Resource link
48
45
  # @param headers Request header
49
46
  #
50
- def self.get url, config = {}
51
- initialize config
47
+ def self.get(url, config = {})
48
+ initialize config
52
49
  @agent.fetch url, :get
53
50
  end
54
51
 
55
-
56
52
  # HTTP::POST request
57
53
  #
58
54
  # @param url Resource link
59
55
  # @param params Request post data
60
56
  # @param headers Request header
61
57
  #
62
- def post url, params = {}, headers = {}
58
+ def post(url, params = {}, headers = {})
63
59
  @agent.fetch url, :post, headers, params
64
60
  end
65
61
 
66
-
67
62
  # Singleton > HTTP::GET request
68
63
  #
69
64
  # @param url Resource link
70
65
  # @param headers Request header
71
66
  #
72
- def self.post url, params = {}, config = {}
73
- initialize config
67
+ def self.post(url, params = {}, config = {})
68
+ initialize config
74
69
  @agent.fetch url, :post, {}, params
75
70
  end
76
71
 
77
-
78
72
  # Set DEBUG flag
79
73
  #
80
74
  # @param debug Flag to start debug
81
75
  #
82
- def debug= debug
76
+ def debug=(debug)
83
77
  @agent.debug = debug
84
78
  end
85
79
 
86
-
87
80
  # Read READ_TIMEOUT agent attribute
88
81
  #
89
82
  def read_timeout
90
83
  @agent.read_timeout
91
84
  end
92
85
 
93
-
94
86
  # Set READ_TIMEOUT agent attribute
95
87
  #
96
88
  # @param read_timeout Waiting time to reading
97
89
  #
98
- def read_timeout= read_timeout
90
+ def read_timeout=(read_timeout)
99
91
  @agent.read_timeout = read_timeout
100
92
  end
101
93
 
102
-
103
94
  # Read USER_AGENT agent attribute
104
95
  #
105
96
  def user_agent
106
97
  @agent.user_agent
107
98
  end
108
99
 
109
-
110
100
  # Set USER_AGENT agent attribute
111
101
  #
112
102
  # @param user_agent Web browser name
113
103
  #
114
- def user_agent= user_agent
104
+ def user_agent=(user_agent)
115
105
  @agent.user_agent = user_agent
116
106
  end
117
107
 
118
-
119
108
  # Read PROXY agent attribute
120
109
  #
121
110
  def proxy
122
111
  @agent.proxy
123
112
  end
124
113
 
125
-
126
114
  # Set PROXY agent attribute
127
115
  #
128
116
  # @param proxy Proxy configuration
129
117
  #
130
- def proxy= proxy
118
+ def proxy=(proxy)
131
119
  @agent.proxy = proxy
132
120
  end
133
121
 
134
-
135
122
  # Set BASIC_AUTH agent attribute
136
123
  #
137
124
  # @param username Authentification username
138
125
  # @param password Authentification password
139
126
  #
140
- def basic_auth username, password
127
+ def basic_auth(username, password)
141
128
  @agent.basic_auth = { username: username, password: password }
142
129
  end
143
130
 
144
-
145
131
  # Read HEADERS agent attribute
146
132
  #
147
133
  def headers
148
134
  @agent.headers
149
135
  end
150
136
 
151
-
152
137
  # Set HEADERS agent attribute
153
138
  #
154
139
  # @param headers Request headers
155
140
  #
156
- def headers= headers
141
+ def headers=(headers)
157
142
  @agent.headers = headers
158
143
  end
159
144
 
160
-
161
145
  # Read COOKIES agent attribute
162
146
  #
163
147
  def cookies
164
148
  @agent.cookies
165
149
  end
166
150
 
167
-
168
151
  # Set COOKIES agent attribute
169
152
  #
170
153
  # @param cookies Request cookies
171
154
  #
172
- def cookies= cookies
155
+ def cookies=(cookies)
173
156
  @agent.cookies = cookies
174
157
  end
175
158
 
@@ -179,12 +162,19 @@ class TinyGrabber
179
162
  @agent.reset
180
163
  end
181
164
 
182
-
183
165
  # Set verify_mode
184
166
  #
185
167
  # @param verify_mode SSL verify mode
186
168
  #
187
- def verify_mode= verify_mode
169
+ def verify_mode=(verify_mode)
188
170
  @agent.verify_mode = verify_mode
189
171
  end
190
- end
172
+
173
+ # Set follow_location
174
+ #
175
+ # @param follow_location Follow location flag
176
+ #
177
+ def follow_location=(follow_location)
178
+ @agent.follow_location = follow_location
179
+ end
180
+ end
data/tiny_grabber.gemspec CHANGED
@@ -6,11 +6,11 @@ require 'tiny_grabber/version'
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = 'tiny_grabber'
8
8
  spec.version = TinyGrabber::VERSION
9
- spec.authors = ["Aleksandr Chernyshev"]
9
+ spec.authors = ['Aleksandr Chernyshev']
10
10
  spec.email = ['moroznoeytpo@gmail.com']
11
11
 
12
- spec.summary = %q{Tiny grabber}
13
- spec.description = %q{Simple gem for grabbing remote web page.}
12
+ spec.summary = 'Tiny grabber'
13
+ spec.description = 'Simple gem for grabbing remote web page.'
14
14
  spec.homepage = 'https://github.com/moroznoeytpo/tiny_grabber'
15
15
  spec.license = 'MIT'
16
16
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiny_grabber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandr Chernyshev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-04 00:00:00.000000000 Z
11
+ date: 2016-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: socksify