tiny_grabber 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/README.md +51 -33
- data/lib/tiny_grabber/agent.rb +245 -0
- data/lib/tiny_grabber/http.rb +13 -1
- data/lib/tiny_grabber/version.rb +1 -1
- data/lib/tiny_grabber.rb +129 -97
- data/tiny_grabber.gemspec +4 -3
- metadata +21 -7
- data/tiny_grabber-0.0.2.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99211b6e18440ea58661cf5b5a5bdadec631f942
|
4
|
+
data.tar.gz: 1e78bd2089d65949431b519ae7a15b3f4bba202f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a4dc8a114ea9818dee2bd400ccc5e45c60ae40f2a9e7ce5002c51b214b9e471d1066f2d865e48ab0acbe91e80c51d55cb17ed43a7eed29e01283f82c61c09b8
|
7
|
+
data.tar.gz: 4ec0f357261f447f3cd34dda16a9376dd770a73a9c7fc30b22577f42de1b4a33bd557b5c8da85ce54a5d40d780af4e152ebbd93b0d50097fbf0c7081d134f30e
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -27,44 +27,62 @@ Or install it yourself as:
|
|
27
27
|
|
28
28
|
require 'tiny_grabber'
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
read_timeout = 300
|
31
|
+
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36'
|
32
|
+
proxy = { ip: 'xx.xx.xx.xx', port: 'xxxx' }
|
33
|
+
headers1 = { 'Content-Type' => 'text/html; charset=utf-8' }
|
34
|
+
headers2 = { 'Content-Type' => 'text/html; charset=utf-8', 'Connection' => 'keep-alive' }
|
35
|
+
cookies = 'username=username&password=password'
|
36
|
+
params = { key: 'value' }
|
37
|
+
|
38
|
+
# Initialize TinyGrabber object
|
39
|
+
tg = TinyGrabber.new
|
40
|
+
# Set debug flag for view log information
|
41
|
+
tg.debug = true
|
42
|
+
# Set max time to execute request
|
43
|
+
tg.read_timeout = read_timeout
|
44
|
+
# Set web browser name
|
45
|
+
tg.user_agent = user_agent
|
46
|
+
# Set proxy configuration
|
47
|
+
tg.proxy = proxy
|
48
|
+
# Set basic authentification
|
49
|
+
tg.basic_auth('username', 'password')
|
50
|
+
# Set HTTP headers
|
51
|
+
tg.headers = headers1
|
52
|
+
# Set HTTP cookies
|
53
|
+
tg.cookies = cookies
|
54
|
+
|
55
|
+
# Make response with GET method
|
56
|
+
response = tg.get 'https://whoer.net/ru', headers
|
57
|
+
# Make response with POST method
|
58
|
+
response = tg.get 'https://whoer.net/ru', params, headers
|
59
|
+
|
60
|
+
# Make singleton response with GET method
|
61
|
+
response = TinyGrabber.get 'https://whoer.net/ru', headers, { debug = true, read_timeout = read_timeout ...}
|
62
|
+
# Make singleton response with POST method
|
63
|
+
response = TinyGrabber.post 'https://whoer.net/ru', params, headers, { debug = true, read_timeout = read_timeout ...}
|
64
|
+
|
65
|
+
# Get Nokogiri object from response HTML
|
66
|
+
ng = response.ng
|
67
|
+
# Get HTTP response code
|
68
|
+
response.code
|
69
|
+
# Get response cookies
|
70
|
+
response.cookies
|
71
|
+
# Get response headers
|
72
|
+
response.headers
|
32
73
|
|
33
|
-
# Set headers
|
34
|
-
headers = { 'Content-Type' => 'application/json' }
|
35
|
-
|
36
|
-
# Set http(s)/socks4(5) proxy
|
37
|
-
# Proxy type by default is http. You can change it to socks, with setting params proxy_type equal socks
|
38
|
-
proxy = { ip: 'xx.xx.xx.xx', port: xx, proxy_type: :socks }
|
39
|
-
|
40
|
-
# Set Basic Authentication
|
41
|
-
auth = { username: '', password: '' }
|
42
|
-
|
43
|
-
# Set POST data
|
44
|
-
# Request HTTP type by default is GET. You can send POST request with setting post params. Also you cat send empty POST request.
|
45
|
-
post = { some_data: '' }
|
46
|
-
|
47
|
-
# HTTP GET request
|
48
|
-
response = TinyGrabber.get url, headers: headers, proxy: proxy, auth: auth
|
49
|
-
|
50
|
-
# HTTP GET request
|
51
|
-
response = TinyGrabber.post url, post, headers: headers, proxy: proxy, auth: auth
|
52
|
-
|
53
|
-
# HTTP answer code
|
54
|
-
p response.code
|
55
|
-
|
56
|
-
# HTTP content
|
57
|
-
p response.body
|
58
|
-
|
59
|
-
# Nokogiri object
|
60
|
-
p response.ng
|
61
|
-
|
62
|
-
# Response cookies
|
63
|
-
p response.cookies
|
64
74
|
```
|
65
75
|
|
66
76
|
## Changelog
|
67
77
|
|
78
|
+
* *v 0.2.0*
|
79
|
+
* Now there is an opportunity to create object TinyGrabber
|
80
|
+
* Change order of parameters for singleton request
|
81
|
+
* Add response cookies and headers
|
82
|
+
* Add debug flag for detilazition log and save result HTML to /log/*.html file
|
83
|
+
|
84
|
+
* *v 0.1.1*
|
85
|
+
* Save cookie in Redis
|
68
86
|
* *v 0.1.0*
|
69
87
|
* Add TinyGrabber.post method for HTTP POST request
|
70
88
|
* *v 0.0.7*
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# Net::HTTP agent for TinyGrabber
|
2
|
+
# Initialize connect with Resource
|
3
|
+
# Setting connect attributes
|
4
|
+
#
|
5
|
+
class TinyGrabber::Agent
|
6
|
+
# Debug flag for detilazition log and save result HTML to /log/*.html file
|
7
|
+
attr_accessor :debug
|
8
|
+
# Max time to execute request
|
9
|
+
attr_accessor :read_timeout
|
10
|
+
# Web browser name
|
11
|
+
attr_accessor :user_agent
|
12
|
+
# Remote proxy configuration
|
13
|
+
attr_accessor :proxy
|
14
|
+
# Basic authentification configuration
|
15
|
+
attr_accessor :basic_auth
|
16
|
+
# Headers
|
17
|
+
attr_accessor :headers
|
18
|
+
# Headers
|
19
|
+
attr_accessor :cookies
|
20
|
+
|
21
|
+
# Initialization object
|
22
|
+
#
|
23
|
+
def initialize
|
24
|
+
@debug = false
|
25
|
+
|
26
|
+
# Initialize variables agent attributes
|
27
|
+
@user_agent = nil
|
28
|
+
@proxy = []
|
29
|
+
@basic_auth = {}
|
30
|
+
@headers = {}
|
31
|
+
@cookies = nil
|
32
|
+
@read_timeout = 10
|
33
|
+
# Initialize variable for URI object
|
34
|
+
@uri = nil
|
35
|
+
# Initialize variable for Net::HTTP request object
|
36
|
+
@http = Net::HTTP
|
37
|
+
# Initialize variable for Net::HTTP response object
|
38
|
+
@response = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
# Set READ_TIMEOUT agent attribute
|
43
|
+
#
|
44
|
+
# @param read_timeout Waiting time to reading
|
45
|
+
#
|
46
|
+
def read_timeout= read_timeout
|
47
|
+
fail 'attribute read_timeout must be Integer' unless read_timeout.is_a?(Integer)
|
48
|
+
@read_timeout = read_timeout
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# Set USER_AGENT agent attribute
|
53
|
+
#
|
54
|
+
# @param user_agent Web browser name
|
55
|
+
#
|
56
|
+
def user_agent= user_agent
|
57
|
+
fail 'attribute user_agent must be String' unless user_agent.is_a?(String)
|
58
|
+
@user_agent = user_agent
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
# Initialize Net::HTTP connection through proxy provider
|
63
|
+
# TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
|
64
|
+
#
|
65
|
+
# @param proxy Proxy configuration
|
66
|
+
#
|
67
|
+
def proxy= proxy
|
68
|
+
if proxy.is_a?(String)
|
69
|
+
ip, port = proxy.split(':')
|
70
|
+
fail 'attribute proxy must be in format ip:port' unless ip and port
|
71
|
+
proxy = { ip: ip, port: port }
|
72
|
+
end
|
73
|
+
fail 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
|
74
|
+
fail 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] and proxy[:port]
|
75
|
+
|
76
|
+
@proxy = proxy
|
77
|
+
if ['socks', :socks].include?(proxy[:type])
|
78
|
+
@http = Net::HTTP.SOCKSProxy(proxy[:ip], proxy[:port])
|
79
|
+
else
|
80
|
+
@http = Net::HTTP::Proxy(proxy[:ip], proxy[:port])
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
# Set BASIC_AUTH agent attribute
|
86
|
+
#
|
87
|
+
# @param basic_auth Authentification configuration
|
88
|
+
#
|
89
|
+
def basic_auth= basic_auth
|
90
|
+
fail 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
|
91
|
+
fail 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] and basic_auth[:password]
|
92
|
+
@basic_auth = basic_auth
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# Set HEADERS agent attribute
|
97
|
+
#
|
98
|
+
# @param headers Request headers
|
99
|
+
#
|
100
|
+
def headers= headers
|
101
|
+
fail 'attribute headers must be Hash' unless headers.is_a?(Hash)
|
102
|
+
@headers = headers
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# Set COOKIES agent attribute
|
107
|
+
#
|
108
|
+
# @param cookies Request cookies
|
109
|
+
#
|
110
|
+
def cookies= cookies
|
111
|
+
fail 'attribute cookies must be String' unless cookies.is_a?(String)
|
112
|
+
@cookies = cookies
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
# Fetch request for GET and POST HTTP methods
|
117
|
+
# Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute
|
118
|
+
# Make response and save COOKIES for next requests
|
119
|
+
#
|
120
|
+
# @param url Resource link
|
121
|
+
# @param method Request method
|
122
|
+
# @param headers Request header
|
123
|
+
# @param params Request additional params
|
124
|
+
#
|
125
|
+
def fetch url, method = :get, headers = {}, params = {}
|
126
|
+
if @debug
|
127
|
+
p "#{debug_initial_word} =============================="
|
128
|
+
p "#{debug_initial_word} #{method.upcase} #{url}"
|
129
|
+
p "#{debug_initial_word} #{params}"
|
130
|
+
p "#{debug_initial_word} ------------------------------"
|
131
|
+
end
|
132
|
+
set_uri url
|
133
|
+
case method
|
134
|
+
when :get
|
135
|
+
@request = Net::HTTP::Get.new(@uri.request_uri)
|
136
|
+
when :post
|
137
|
+
@request = Net::HTTP::Post.new(@uri.request_uri)
|
138
|
+
@request.set_form_data(params)
|
139
|
+
end
|
140
|
+
set_user_agent if @user_agent
|
141
|
+
set_basic_auth unless @basic_auth.empty?
|
142
|
+
@headers = headers unless headers.empty?
|
143
|
+
set_headers unless @headers.empty?
|
144
|
+
set_cookies if @cookies
|
145
|
+
@response = send_request
|
146
|
+
case @response
|
147
|
+
# HTTP response code 1xx
|
148
|
+
when Net::HTTPInformation
|
149
|
+
# HTTP response code 2xx
|
150
|
+
when Net::HTTPSuccess
|
151
|
+
save_headers if @response.header
|
152
|
+
save_cookies if @response.cookies
|
153
|
+
# HTTP response code 3xx
|
154
|
+
when Net::HTTPRedirection
|
155
|
+
# HTTP response code 4xx
|
156
|
+
when Net::HTTPClientError
|
157
|
+
# HTTP response code 5xx
|
158
|
+
when Net::HTTPServerError
|
159
|
+
end
|
160
|
+
if @debug
|
161
|
+
debug_filename = "log/#{method.upcase}_#{@uri.to_s.gsub(/[\/:]/, '_').gsub(/_+/, '_')}"
|
162
|
+
File.open(debug_filename, 'wb') { |f| f << @response.body } if @debug
|
163
|
+
p "#{debug_initial_word} HTML > #{debug_filename}"
|
164
|
+
end
|
165
|
+
@response
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
# Initialize URI object from request url
|
170
|
+
#
|
171
|
+
# @param url Request link
|
172
|
+
#
|
173
|
+
def set_uri url
|
174
|
+
# It's magic work with escaped url
|
175
|
+
@uri = URI(URI.escape(URI.unescape(url)))
|
176
|
+
p "#{debug_initial_word} URI = #{@uri}" if @debug
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
# Set USER_AGENT request attribute
|
181
|
+
#
|
182
|
+
def set_user_agent
|
183
|
+
@headers['User-Agent'] = @user_agent
|
184
|
+
p "#{debug_initial_word} user_agent = #{@user_agent}" if @debug
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
# Set BASIC_AUTH request authentification
|
189
|
+
#
|
190
|
+
def set_basic_auth
|
191
|
+
@request.basic_auth @basic_auth[:username], @basic_auth[:password]
|
192
|
+
p "#{debug_initial_word} basic_auth = #{@basic_auth}" if @debug
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
# Set request HEADERS
|
197
|
+
#
|
198
|
+
def set_headers
|
199
|
+
@headers.each { |k, v| @request.add_field(String(k), v) }
|
200
|
+
p "#{debug_initial_word} headers = #{@headers}" if @debug
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
# Set request COOKIES
|
205
|
+
#
|
206
|
+
def set_cookies
|
207
|
+
@request['Cookie'] = @cookies
|
208
|
+
p "#{debug_initial_word} cookies = #{@cookies}" if @debug
|
209
|
+
end
|
210
|
+
|
211
|
+
|
212
|
+
# Send request and get response
|
213
|
+
# Use SSL connect for HTTPS link scheme
|
214
|
+
#
|
215
|
+
def send_request
|
216
|
+
@http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
|
217
|
+
http.read_timeout = @read_timeout
|
218
|
+
p "#{debug_initial_word} read_timeout = #{@read_timeout}" if @debug
|
219
|
+
http.request(@request)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
# Save response headers in agent attribute
|
225
|
+
#
|
226
|
+
def save_headers
|
227
|
+
@headers = @response.headers
|
228
|
+
p "#{debug_initial_word} save_headers = #{@headers}" if @debug
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
# Save response cookies in agent attribute
|
233
|
+
#
|
234
|
+
def save_cookies
|
235
|
+
@cookies = @response.cookies
|
236
|
+
p "#{debug_initial_word} save_cookies = #{@cookies}" if @debug
|
237
|
+
end
|
238
|
+
|
239
|
+
|
240
|
+
# Tiny grabber initial word for debug
|
241
|
+
#
|
242
|
+
def debug_initial_word
|
243
|
+
"TG | #{Time.now.strftime('%Y%m%d-%H%M%S')} |"
|
244
|
+
end
|
245
|
+
end
|
data/lib/tiny_grabber/http.rb
CHANGED
@@ -14,7 +14,19 @@ module Net
|
|
14
14
|
# Response Cookies
|
15
15
|
#
|
16
16
|
def cookies
|
17
|
-
self.get_fields('set-cookie')
|
17
|
+
cookies = self.get_fields('set-cookie')
|
18
|
+
if cookies
|
19
|
+
cookies.map { |cookie| cookie.gsub(/\A([^;]+).*\Z/, '\1') }.join('&')
|
20
|
+
else
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
# Response Headers
|
27
|
+
#
|
28
|
+
def headers
|
29
|
+
self.header.to_hash.inject({}) { |headers, (header_key, header_value)| headers[header_key] = header_value.first; headers }
|
18
30
|
end
|
19
31
|
end
|
20
32
|
end
|
data/lib/tiny_grabber/version.rb
CHANGED
data/lib/tiny_grabber.rb
CHANGED
@@ -1,147 +1,179 @@
|
|
1
|
-
require
|
1
|
+
require 'tiny_grabber/version'
|
2
|
+
require 'tiny_grabber/agent'
|
2
3
|
|
3
4
|
require 'uri'
|
4
5
|
require 'net/http'
|
5
6
|
require 'socksify/http'
|
6
7
|
require 'tiny_grabber/http'
|
8
|
+
require 'redis'
|
7
9
|
|
8
10
|
# Main class for TinyGrabber
|
11
|
+
#
|
9
12
|
class TinyGrabber
|
10
13
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
# Initialize a new TinyGrabber user agent.
|
15
|
+
#
|
16
|
+
def initialize
|
17
|
+
@agent = TinyGrabber::Agent.new
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
# Singleton > Initialize a new TinyGrabber user agent.
|
22
|
+
#
|
23
|
+
def self.initialize config = {}
|
24
|
+
@agent = TinyGrabber::Agent.new
|
25
|
+
|
26
|
+
@agent.debug = config[:debug] if config[:debug]
|
27
|
+
@agent.read_timeout = config[:read_timeout] if config[:read_timeout]
|
28
|
+
@agent.user_agent = config[:user_agent] if config[:user_agent]
|
29
|
+
@agent.proxy = config[:proxy] if config[:proxy]
|
30
|
+
@agent.basic_auth = config[:basic_auth] if config[:basic_auth]
|
31
|
+
@agent.headers = config[:headers] if config[:headers]
|
32
|
+
@agent.cookies = config[:cookies] if config[:cookies]
|
33
|
+
end
|
34
|
+
|
15
35
|
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
# * [proxy (hash)] Configuration of remote proxy server
|
21
|
-
# * *ip* address of remote proxy server
|
22
|
-
# * *port* of remote proxy server
|
23
|
-
# * *proxy_type* of remote proxy server
|
24
|
-
# * *http* for http(s) proxy servers (by default)
|
25
|
-
# * *socks* for socks4/5 proxy servers
|
26
|
-
# * [headers (hash)] Headers
|
27
|
-
# * [auth (hash)] Basic Authentication
|
28
|
-
# * *username* Authenticate username
|
29
|
-
# * *password* Authenticate password
|
36
|
+
# HTTP::GET request
|
37
|
+
#
|
38
|
+
# @param url Resource link
|
39
|
+
# @param headers Request header
|
30
40
|
#
|
31
|
-
|
32
|
-
|
41
|
+
def get url, headers = {}
|
42
|
+
@agent.fetch url, :get, headers
|
43
|
+
end
|
44
|
+
|
45
|
+
# Singleton > HTTP::GET request
|
33
46
|
#
|
34
|
-
# @param url
|
35
|
-
# @param
|
47
|
+
# @param url Resource link
|
48
|
+
# @param headers Request header
|
36
49
|
#
|
37
|
-
def self.get url,
|
38
|
-
|
39
|
-
@
|
40
|
-
@http = set_http_connect
|
41
|
-
@request = Net::HTTP::Get.new(@uri.request_uri)
|
42
|
-
set_basic_auth
|
43
|
-
set_headers
|
44
|
-
get_request
|
50
|
+
def self.get url, headers = {}, config = {}
|
51
|
+
initialize config
|
52
|
+
@agent.fetch url, :get, headers
|
45
53
|
end
|
46
54
|
|
47
55
|
|
48
|
-
|
49
|
-
# Get Net::HTTP object for content from remote single page
|
50
|
-
# [url (string)] Link to resource
|
51
|
-
# [data (hash)] Post data
|
52
|
-
# [params (hash)] Request params
|
53
|
-
# * [proxy (hash)] Configuration of remote proxy server
|
54
|
-
# * *ip* address of remote proxy server
|
55
|
-
# * *port* of remote proxy server
|
56
|
-
# * *proxy_type* of remote proxy server
|
57
|
-
# * *http* for http(s) proxy servers (by default)
|
58
|
-
# * *socks* for socks4/5 proxy servers
|
59
|
-
# * [headers (hash)] Headers
|
60
|
-
# * [auth (hash)] Basic Authentication
|
61
|
-
# * *username* Authenticate username
|
62
|
-
# * *password* Authenticate password
|
56
|
+
# HTTP::POST request
|
63
57
|
#
|
64
|
-
#
|
65
|
-
#
|
58
|
+
# @param url Resource link
|
59
|
+
# @param params Request post data
|
60
|
+
# @param headers Request header
|
66
61
|
#
|
67
|
-
|
68
|
-
|
62
|
+
def post url, params = {}, headers = {}, config = {}
|
63
|
+
@agent.fetch url, :post, headers, params
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
# Singleton > HTTP::GET request
|
68
|
+
#
|
69
|
+
# @param url Resource link
|
70
|
+
# @param headers Request header
|
69
71
|
#
|
70
|
-
def self.post url,
|
71
|
-
|
72
|
-
|
73
|
-
@params = convert_params_to_sym params
|
74
|
-
@http = set_http_connect
|
75
|
-
@request = Net::HTTP::Post.new(@uri.request_uri)
|
76
|
-
@request.set_form_data(data)
|
77
|
-
set_basic_auth
|
78
|
-
set_headers
|
79
|
-
get_request
|
72
|
+
def self.post url, params = {}, headers = {}, config = {}
|
73
|
+
initialize config
|
74
|
+
@agent.fetch url, :post, headers, params
|
80
75
|
end
|
81
76
|
|
82
|
-
private
|
83
77
|
|
84
|
-
#
|
78
|
+
# Set DEBUG flag
|
85
79
|
#
|
86
|
-
# @param
|
80
|
+
# @param debug Flag to start debug
|
87
81
|
#
|
88
|
-
def
|
89
|
-
|
82
|
+
def debug= debug
|
83
|
+
@agent.debug = debug
|
90
84
|
end
|
91
85
|
|
92
|
-
|
86
|
+
|
87
|
+
# Read READ_TIMEOUT agent attribute
|
93
88
|
#
|
94
|
-
|
89
|
+
def read_timeout
|
90
|
+
@agent.read_timeout
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
# Set READ_TIMEOUT agent attribute
|
95
|
+
#
|
96
|
+
# @param read_timeout Waiting time to reading
|
95
97
|
#
|
96
|
-
def
|
97
|
-
|
98
|
+
def read_timeout= read_timeout
|
99
|
+
@agent.read_timeout = read_timeout
|
98
100
|
end
|
99
101
|
|
100
102
|
|
101
|
-
#
|
103
|
+
# Read USER_AGENT agent attribute
|
102
104
|
#
|
103
|
-
def
|
104
|
-
|
105
|
-
if ['socks', :socks].include?(@params[:proxy][:proxy_type])
|
106
|
-
Net::HTTP.SOCKSProxy(@params[:proxy][:ip], @params[:proxy][:port])
|
107
|
-
else
|
108
|
-
Net::HTTP::Proxy(@params[:proxy][:ip], @params[:proxy][:port])
|
109
|
-
end
|
110
|
-
else
|
111
|
-
Net::HTTP
|
112
|
-
end
|
105
|
+
def user_agent
|
106
|
+
@agent.user_agent
|
113
107
|
end
|
114
108
|
|
115
109
|
|
116
|
-
# Set
|
110
|
+
# Set USER_AGENT agent attribute
|
117
111
|
#
|
118
|
-
# @param
|
119
|
-
# @param params Request params
|
112
|
+
# @param user_agent Web browser name
|
120
113
|
#
|
121
|
-
def
|
122
|
-
|
123
|
-
@request.basic_auth @params[:auth][:username], @params[:auth][:password]
|
124
|
-
end
|
114
|
+
def user_agent= user_agent
|
115
|
+
@agent.user_agent = user_agent
|
125
116
|
end
|
126
117
|
|
127
118
|
|
128
|
-
#
|
119
|
+
# Read PROXY agent attribute
|
129
120
|
#
|
130
|
-
|
131
|
-
|
121
|
+
def proxy
|
122
|
+
@agent.proxy
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
# Set PROXY agent attribute
|
132
127
|
#
|
133
|
-
|
134
|
-
|
128
|
+
# @param proxy Proxy configuration
|
129
|
+
#
|
130
|
+
def proxy= proxy
|
131
|
+
@agent.proxy = proxy
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Set BASIC_AUTH agent attribute
|
136
|
+
#
|
137
|
+
# @param username Authentification username
|
138
|
+
# @param password Authentification password
|
139
|
+
#
|
140
|
+
def basic_auth username, password
|
141
|
+
@agent.basic_auth = { username: username, password: password }
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
# Read HEADERS agent attribute
|
146
|
+
#
|
147
|
+
def headers
|
148
|
+
@agent.headers
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
# Set HEADERS agent attribute
|
153
|
+
#
|
154
|
+
# @param headers Request headers
|
155
|
+
#
|
156
|
+
def headers= headers
|
157
|
+
@agent.headers = headers
|
135
158
|
end
|
136
159
|
|
137
160
|
|
138
|
-
#
|
161
|
+
# Read COOKIES agent attribute
|
139
162
|
#
|
140
|
-
|
141
|
-
|
163
|
+
def cookies
|
164
|
+
@agent.cookies
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
# Set COOKIES agent attribute
|
169
|
+
#
|
170
|
+
# @param cookies Request cookies
|
142
171
|
#
|
143
|
-
def
|
144
|
-
@
|
172
|
+
def cookies= cookies
|
173
|
+
@agent.cookies = cookies
|
145
174
|
end
|
146
175
|
|
176
|
+
|
177
|
+
|
178
|
+
|
147
179
|
end
|
data/tiny_grabber.gemspec
CHANGED
@@ -20,10 +20,11 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.required_ruby_version = '~> 2.3.0'
|
22
22
|
|
23
|
-
spec.
|
24
|
-
spec.
|
23
|
+
spec.add_runtime_dependency 'socksify', '~> 1.7'
|
24
|
+
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
25
|
+
spec.add_runtime_dependency 'redis', '~> 3.3'
|
25
26
|
|
26
|
-
spec.add_development_dependency 'bundler', '~> 1.11'
|
27
27
|
spec.add_development_dependency 'rake', '~> 10.0'
|
28
|
+
spec.add_development_dependency 'bundler', '~> 1.11'
|
28
29
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
29
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny_grabber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksandr Chernyshev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-06-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: socksify
|
@@ -39,19 +39,19 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.6'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: redis
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
48
|
-
type: :
|
47
|
+
version: '3.3'
|
48
|
+
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '3.3'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: bundler
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.11'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.11'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rspec
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,9 +112,9 @@ files:
|
|
98
112
|
- bin/console
|
99
113
|
- bin/setup
|
100
114
|
- lib/tiny_grabber.rb
|
115
|
+
- lib/tiny_grabber/agent.rb
|
101
116
|
- lib/tiny_grabber/http.rb
|
102
117
|
- lib/tiny_grabber/version.rb
|
103
|
-
- tiny_grabber-0.0.2.gem
|
104
118
|
- tiny_grabber.gemspec
|
105
119
|
homepage: https://github.com/moroznoeytpo/tiny_grabber
|
106
120
|
licenses:
|
data/tiny_grabber-0.0.2.gem
DELETED
Binary file
|