tiny_grabber 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/README.md +51 -33
- data/lib/tiny_grabber/agent.rb +245 -0
- data/lib/tiny_grabber/http.rb +13 -1
- data/lib/tiny_grabber/version.rb +1 -1
- data/lib/tiny_grabber.rb +129 -97
- data/tiny_grabber.gemspec +4 -3
- metadata +21 -7
- data/tiny_grabber-0.0.2.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99211b6e18440ea58661cf5b5a5bdadec631f942
|
4
|
+
data.tar.gz: 1e78bd2089d65949431b519ae7a15b3f4bba202f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a4dc8a114ea9818dee2bd400ccc5e45c60ae40f2a9e7ce5002c51b214b9e471d1066f2d865e48ab0acbe91e80c51d55cb17ed43a7eed29e01283f82c61c09b8
|
7
|
+
data.tar.gz: 4ec0f357261f447f3cd34dda16a9376dd770a73a9c7fc30b22577f42de1b4a33bd557b5c8da85ce54a5d40d780af4e152ebbd93b0d50097fbf0c7081d134f30e
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -27,44 +27,62 @@ Or install it yourself as:
|
|
27
27
|
|
28
28
|
require 'tiny_grabber'
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
read_timeout = 300
|
31
|
+
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36'
|
32
|
+
proxy = { ip: 'xx.xx.xx.xx', port: 'xxxx' }
|
33
|
+
headers1 = { 'Content-Type' => 'text/html; charset=utf-8' }
|
34
|
+
headers2 = { 'Content-Type' => 'text/html; charset=utf-8', 'Connection' => 'keep-alive' }
|
35
|
+
cookies = 'username=username&password=password'
|
36
|
+
params = { key: 'value' }
|
37
|
+
|
38
|
+
# Initialize TinyGrabber object
|
39
|
+
tg = TinyGrabber.new
|
40
|
+
# Set debug flag for view log information
|
41
|
+
tg.debug = true
|
42
|
+
# Set max time to execute request
|
43
|
+
tg.read_timeout = read_timeout
|
44
|
+
# Set web browser name
|
45
|
+
tg.user_agent = user_agent
|
46
|
+
# Set proxy configuration
|
47
|
+
tg.proxy = proxy
|
48
|
+
# Set basic authentification
|
49
|
+
tg.basic_auth('username', 'password')
|
50
|
+
# Set HTTP headers
|
51
|
+
tg.headers = headers1
|
52
|
+
# Set HTTP cookies
|
53
|
+
tg.cookies = cookies
|
54
|
+
|
55
|
+
# Make response with GET method
|
56
|
+
response = tg.get 'https://whoer.net/ru', headers
|
57
|
+
# Make response with POST method
|
58
|
+
response = tg.get 'https://whoer.net/ru', params, headers
|
59
|
+
|
60
|
+
# Make singleton response with GET method
|
61
|
+
response = TinyGrabber.get 'https://whoer.net/ru', headers, { debug = true, read_timeout = read_timeout ...}
|
62
|
+
# Make singleton response with POST method
|
63
|
+
response = TinyGrabber.post 'https://whoer.net/ru', params, headers, { debug = true, read_timeout = read_timeout ...}
|
64
|
+
|
65
|
+
# Get Nokogiri object from response HTML
|
66
|
+
ng = response.ng
|
67
|
+
# Get HTTP response code
|
68
|
+
response.code
|
69
|
+
# Get response cookies
|
70
|
+
response.cookies
|
71
|
+
# Get response headers
|
72
|
+
response.headers
|
32
73
|
|
33
|
-
# Set headers
|
34
|
-
headers = { 'Content-Type' => 'application/json' }
|
35
|
-
|
36
|
-
# Set http(s)/socks4(5) proxy
|
37
|
-
# Proxy type by default is http. You can change it to socks, with setting params proxy_type equal socks
|
38
|
-
proxy = { ip: 'xx.xx.xx.xx', port: xx, proxy_type: :socks }
|
39
|
-
|
40
|
-
# Set Basic Authentication
|
41
|
-
auth = { username: '', password: '' }
|
42
|
-
|
43
|
-
# Set POST data
|
44
|
-
# Request HTTP type by default is GET. You can send POST request with setting post params. Also you cat send empty POST request.
|
45
|
-
post = { some_data: '' }
|
46
|
-
|
47
|
-
# HTTP GET request
|
48
|
-
response = TinyGrabber.get url, headers: headers, proxy: proxy, auth: auth
|
49
|
-
|
50
|
-
# HTTP GET request
|
51
|
-
response = TinyGrabber.post url, post, headers: headers, proxy: proxy, auth: auth
|
52
|
-
|
53
|
-
# HTTP answer code
|
54
|
-
p response.code
|
55
|
-
|
56
|
-
# HTTP content
|
57
|
-
p response.body
|
58
|
-
|
59
|
-
# Nokogiri object
|
60
|
-
p response.ng
|
61
|
-
|
62
|
-
# Response cookies
|
63
|
-
p response.cookies
|
64
74
|
```
|
65
75
|
|
66
76
|
## Changelog
|
67
77
|
|
78
|
+
* *v 0.2.0*
|
79
|
+
* Now there is an opportunity to create object TinyGrabber
|
80
|
+
* Change order of parameters for singleton request
|
81
|
+
* Add response cookies and headers
|
82
|
+
* Add debug flag for detilazition log and save result HTML to /log/*.html file
|
83
|
+
|
84
|
+
* *v 0.1.1*
|
85
|
+
* Save cookie in Redis
|
68
86
|
* *v 0.1.0*
|
69
87
|
* Add TinyGrabber.post method for HTTP POST request
|
70
88
|
* *v 0.0.7*
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# Net::HTTP agent for TinyGrabber
|
2
|
+
# Initialize connect with Resource
|
3
|
+
# Setting connect attributes
|
4
|
+
#
|
5
|
+
class TinyGrabber::Agent
|
6
|
+
# Debug flag for detilazition log and save result HTML to /log/*.html file
|
7
|
+
attr_accessor :debug
|
8
|
+
# Max time to execute request
|
9
|
+
attr_accessor :read_timeout
|
10
|
+
# Web browser name
|
11
|
+
attr_accessor :user_agent
|
12
|
+
# Remote proxy configuration
|
13
|
+
attr_accessor :proxy
|
14
|
+
# Basic authentification configuration
|
15
|
+
attr_accessor :basic_auth
|
16
|
+
# Headers
|
17
|
+
attr_accessor :headers
|
18
|
+
# Headers
|
19
|
+
attr_accessor :cookies
|
20
|
+
|
21
|
+
# Initialization object
|
22
|
+
#
|
23
|
+
def initialize
|
24
|
+
@debug = false
|
25
|
+
|
26
|
+
# Initialize variables agent attributes
|
27
|
+
@user_agent = nil
|
28
|
+
@proxy = []
|
29
|
+
@basic_auth = {}
|
30
|
+
@headers = {}
|
31
|
+
@cookies = nil
|
32
|
+
@read_timeout = 10
|
33
|
+
# Initialize variable for URI object
|
34
|
+
@uri = nil
|
35
|
+
# Initialize variable for Net::HTTP request object
|
36
|
+
@http = Net::HTTP
|
37
|
+
# Initialize variable for Net::HTTP response object
|
38
|
+
@response = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
# Set READ_TIMEOUT agent attribute
|
43
|
+
#
|
44
|
+
# @param read_timeout Waiting time to reading
|
45
|
+
#
|
46
|
+
def read_timeout= read_timeout
|
47
|
+
fail 'attribute read_timeout must be Integer' unless read_timeout.is_a?(Integer)
|
48
|
+
@read_timeout = read_timeout
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# Set USER_AGENT agent attribute
|
53
|
+
#
|
54
|
+
# @param user_agent Web browser name
|
55
|
+
#
|
56
|
+
def user_agent= user_agent
|
57
|
+
fail 'attribute user_agent must be String' unless user_agent.is_a?(String)
|
58
|
+
@user_agent = user_agent
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
# Initialize Net::HTTP connection through proxy provider
|
63
|
+
# TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
|
64
|
+
#
|
65
|
+
# @param proxy Proxy configuration
|
66
|
+
#
|
67
|
+
def proxy= proxy
|
68
|
+
if proxy.is_a?(String)
|
69
|
+
ip, port = proxy.split(':')
|
70
|
+
fail 'attribute proxy must be in format ip:port' unless ip and port
|
71
|
+
proxy = { ip: ip, port: port }
|
72
|
+
end
|
73
|
+
fail 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
|
74
|
+
fail 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] and proxy[:port]
|
75
|
+
|
76
|
+
@proxy = proxy
|
77
|
+
if ['socks', :socks].include?(proxy[:type])
|
78
|
+
@http = Net::HTTP.SOCKSProxy(proxy[:ip], proxy[:port])
|
79
|
+
else
|
80
|
+
@http = Net::HTTP::Proxy(proxy[:ip], proxy[:port])
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
# Set BASIC_AUTH agent attribute
|
86
|
+
#
|
87
|
+
# @param basic_auth Authentification configuration
|
88
|
+
#
|
89
|
+
def basic_auth= basic_auth
|
90
|
+
fail 'attribute basic_auth must be Hash' unless basic_auth.is_a?(Hash)
|
91
|
+
fail 'attribute basic_auth must contain :username and :password keys' unless basic_auth[:username] and basic_auth[:password]
|
92
|
+
@basic_auth = basic_auth
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# Set HEADERS agent attribute
|
97
|
+
#
|
98
|
+
# @param headers Request headers
|
99
|
+
#
|
100
|
+
def headers= headers
|
101
|
+
fail 'attribute headers must be Hash' unless headers.is_a?(Hash)
|
102
|
+
@headers = headers
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# Set COOKIES agent attribute
|
107
|
+
#
|
108
|
+
# @param cookies Request cookies
|
109
|
+
#
|
110
|
+
def cookies= cookies
|
111
|
+
fail 'attribute cookies must be String' unless cookies.is_a?(String)
|
112
|
+
@cookies = cookies
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
# Fetch request for GET and POST HTTP methods
|
117
|
+
# Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute
|
118
|
+
# Make response and save COOKIES for next requests
|
119
|
+
#
|
120
|
+
# @param url Resource link
|
121
|
+
# @param method Request method
|
122
|
+
# @param headers Request header
|
123
|
+
# @param params Request additional params
|
124
|
+
#
|
125
|
+
def fetch url, method = :get, headers = {}, params = {}
|
126
|
+
if @debug
|
127
|
+
p "#{debug_initial_word} =============================="
|
128
|
+
p "#{debug_initial_word} #{method.upcase} #{url}"
|
129
|
+
p "#{debug_initial_word} #{params}"
|
130
|
+
p "#{debug_initial_word} ------------------------------"
|
131
|
+
end
|
132
|
+
set_uri url
|
133
|
+
case method
|
134
|
+
when :get
|
135
|
+
@request = Net::HTTP::Get.new(@uri.request_uri)
|
136
|
+
when :post
|
137
|
+
@request = Net::HTTP::Post.new(@uri.request_uri)
|
138
|
+
@request.set_form_data(params)
|
139
|
+
end
|
140
|
+
set_user_agent if @user_agent
|
141
|
+
set_basic_auth unless @basic_auth.empty?
|
142
|
+
@headers = headers unless headers.empty?
|
143
|
+
set_headers unless @headers.empty?
|
144
|
+
set_cookies if @cookies
|
145
|
+
@response = send_request
|
146
|
+
case @response
|
147
|
+
# HTTP response code 1xx
|
148
|
+
when Net::HTTPInformation
|
149
|
+
# HTTP response code 2xx
|
150
|
+
when Net::HTTPSuccess
|
151
|
+
save_headers if @response.header
|
152
|
+
save_cookies if @response.cookies
|
153
|
+
# HTTP response code 3xx
|
154
|
+
when Net::HTTPRedirection
|
155
|
+
# HTTP response code 4xx
|
156
|
+
when Net::HTTPClientError
|
157
|
+
# HTTP response code 5xx
|
158
|
+
when Net::HTTPServerError
|
159
|
+
end
|
160
|
+
if @debug
|
161
|
+
debug_filename = "log/#{method.upcase}_#{@uri.to_s.gsub(/[\/:]/, '_').gsub(/_+/, '_')}"
|
162
|
+
File.open(debug_filename, 'wb') { |f| f << @response.body } if @debug
|
163
|
+
p "#{debug_initial_word} HTML > #{debug_filename}"
|
164
|
+
end
|
165
|
+
@response
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
# Initialize URI object from request url
|
170
|
+
#
|
171
|
+
# @param url Request link
|
172
|
+
#
|
173
|
+
def set_uri url
|
174
|
+
# It's magic work with escaped url
|
175
|
+
@uri = URI(URI.escape(URI.unescape(url)))
|
176
|
+
p "#{debug_initial_word} URI = #{@uri}" if @debug
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
# Set USER_AGENT request attribute
|
181
|
+
#
|
182
|
+
def set_user_agent
|
183
|
+
@headers['User-Agent'] = @user_agent
|
184
|
+
p "#{debug_initial_word} user_agent = #{@user_agent}" if @debug
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
# Set BASIC_AUTH request authentification
|
189
|
+
#
|
190
|
+
def set_basic_auth
|
191
|
+
@request.basic_auth @basic_auth[:username], @basic_auth[:password]
|
192
|
+
p "#{debug_initial_word} basic_auth = #{@basic_auth}" if @debug
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
# Set request HEADERS
|
197
|
+
#
|
198
|
+
def set_headers
|
199
|
+
@headers.each { |k, v| @request.add_field(String(k), v) }
|
200
|
+
p "#{debug_initial_word} headers = #{@headers}" if @debug
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
# Set request COOKIES
|
205
|
+
#
|
206
|
+
def set_cookies
|
207
|
+
@request['Cookie'] = @cookies
|
208
|
+
p "#{debug_initial_word} cookies = #{@cookies}" if @debug
|
209
|
+
end
|
210
|
+
|
211
|
+
|
212
|
+
# Send request and get response
|
213
|
+
# Use SSL connect for HTTPS link scheme
|
214
|
+
#
|
215
|
+
def send_request
|
216
|
+
@http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
|
217
|
+
http.read_timeout = @read_timeout
|
218
|
+
p "#{debug_initial_word} read_timeout = #{@read_timeout}" if @debug
|
219
|
+
http.request(@request)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
# Save response headers in agent attribute
|
225
|
+
#
|
226
|
+
def save_headers
|
227
|
+
@headers = @response.headers
|
228
|
+
p "#{debug_initial_word} save_headers = #{@headers}" if @debug
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
# Save response cookies in agent attribute
|
233
|
+
#
|
234
|
+
def save_cookies
|
235
|
+
@cookies = @response.cookies
|
236
|
+
p "#{debug_initial_word} save_cookies = #{@cookies}" if @debug
|
237
|
+
end
|
238
|
+
|
239
|
+
|
240
|
+
# Tiny grabber initial word for debug
|
241
|
+
#
|
242
|
+
def debug_initial_word
|
243
|
+
"TG | #{Time.now.strftime('%Y%m%d-%H%M%S')} |"
|
244
|
+
end
|
245
|
+
end
|
data/lib/tiny_grabber/http.rb
CHANGED
@@ -14,7 +14,19 @@ module Net
|
|
14
14
|
# Response Cookies
|
15
15
|
#
|
16
16
|
def cookies
|
17
|
-
self.get_fields('set-cookie')
|
17
|
+
cookies = self.get_fields('set-cookie')
|
18
|
+
if cookies
|
19
|
+
cookies.map { |cookie| cookie.gsub(/\A([^;]+).*\Z/, '\1') }.join('&')
|
20
|
+
else
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
# Response Headers
|
27
|
+
#
|
28
|
+
def headers
|
29
|
+
self.header.to_hash.inject({}) { |headers, (header_key, header_value)| headers[header_key] = header_value.first; headers }
|
18
30
|
end
|
19
31
|
end
|
20
32
|
end
|
data/lib/tiny_grabber/version.rb
CHANGED
data/lib/tiny_grabber.rb
CHANGED
@@ -1,147 +1,179 @@
|
|
1
|
-
require
|
1
|
+
require 'tiny_grabber/version'
|
2
|
+
require 'tiny_grabber/agent'
|
2
3
|
|
3
4
|
require 'uri'
|
4
5
|
require 'net/http'
|
5
6
|
require 'socksify/http'
|
6
7
|
require 'tiny_grabber/http'
|
8
|
+
require 'redis'
|
7
9
|
|
8
10
|
# Main class for TinyGrabber
|
11
|
+
#
|
9
12
|
class TinyGrabber
|
10
13
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
# Initialize a new TinyGrabber user agent.
|
15
|
+
#
|
16
|
+
def initialize
|
17
|
+
@agent = TinyGrabber::Agent.new
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
# Singleton > Initialize a new TinyGrabber user agent.
|
22
|
+
#
|
23
|
+
def self.initialize config = {}
|
24
|
+
@agent = TinyGrabber::Agent.new
|
25
|
+
|
26
|
+
@agent.debug = config[:debug] if config[:debug]
|
27
|
+
@agent.read_timeout = config[:read_timeout] if config[:read_timeout]
|
28
|
+
@agent.user_agent = config[:user_agent] if config[:user_agent]
|
29
|
+
@agent.proxy = config[:proxy] if config[:proxy]
|
30
|
+
@agent.basic_auth = config[:basic_auth] if config[:basic_auth]
|
31
|
+
@agent.headers = config[:headers] if config[:headers]
|
32
|
+
@agent.cookies = config[:cookies] if config[:cookies]
|
33
|
+
end
|
34
|
+
|
15
35
|
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
# * [proxy (hash)] Configuration of remote proxy server
|
21
|
-
# * *ip* address of remote proxy server
|
22
|
-
# * *port* of remote proxy server
|
23
|
-
# * *proxy_type* of remote proxy server
|
24
|
-
# * *http* for http(s) proxy servers (by default)
|
25
|
-
# * *socks* for socks4/5 proxy servers
|
26
|
-
# * [headers (hash)] Headers
|
27
|
-
# * [auth (hash)] Basic Authentication
|
28
|
-
# * *username* Authenticate username
|
29
|
-
# * *password* Authenticate password
|
36
|
+
# HTTP::GET request
|
37
|
+
#
|
38
|
+
# @param url Resource link
|
39
|
+
# @param headers Request header
|
30
40
|
#
|
31
|
-
|
32
|
-
|
41
|
+
def get url, headers = {}
|
42
|
+
@agent.fetch url, :get, headers
|
43
|
+
end
|
44
|
+
|
45
|
+
# Singleton > HTTP::GET request
|
33
46
|
#
|
34
|
-
# @param url
|
35
|
-
# @param
|
47
|
+
# @param url Resource link
|
48
|
+
# @param headers Request header
|
36
49
|
#
|
37
|
-
def self.get url,
|
38
|
-
|
39
|
-
@
|
40
|
-
@http = set_http_connect
|
41
|
-
@request = Net::HTTP::Get.new(@uri.request_uri)
|
42
|
-
set_basic_auth
|
43
|
-
set_headers
|
44
|
-
get_request
|
50
|
+
def self.get url, headers = {}, config = {}
|
51
|
+
initialize config
|
52
|
+
@agent.fetch url, :get, headers
|
45
53
|
end
|
46
54
|
|
47
55
|
|
48
|
-
|
49
|
-
# Get Net::HTTP object for content from remote single page
|
50
|
-
# [url (string)] Link to resource
|
51
|
-
# [data (hash)] Post data
|
52
|
-
# [params (hash)] Request params
|
53
|
-
# * [proxy (hash)] Configuration of remote proxy server
|
54
|
-
# * *ip* address of remote proxy server
|
55
|
-
# * *port* of remote proxy server
|
56
|
-
# * *proxy_type* of remote proxy server
|
57
|
-
# * *http* for http(s) proxy servers (by default)
|
58
|
-
# * *socks* for socks4/5 proxy servers
|
59
|
-
# * [headers (hash)] Headers
|
60
|
-
# * [auth (hash)] Basic Authentication
|
61
|
-
# * *username* Authenticate username
|
62
|
-
# * *password* Authenticate password
|
56
|
+
# HTTP::POST request
|
63
57
|
#
|
64
|
-
#
|
65
|
-
#
|
58
|
+
# @param url Resource link
|
59
|
+
# @param params Request post data
|
60
|
+
# @param headers Request header
|
66
61
|
#
|
67
|
-
|
68
|
-
|
62
|
+
def post url, params = {}, headers = {}, config = {}
|
63
|
+
@agent.fetch url, :post, headers, params
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
# Singleton > HTTP::GET request
|
68
|
+
#
|
69
|
+
# @param url Resource link
|
70
|
+
# @param headers Request header
|
69
71
|
#
|
70
|
-
def self.post url,
|
71
|
-
|
72
|
-
|
73
|
-
@params = convert_params_to_sym params
|
74
|
-
@http = set_http_connect
|
75
|
-
@request = Net::HTTP::Post.new(@uri.request_uri)
|
76
|
-
@request.set_form_data(data)
|
77
|
-
set_basic_auth
|
78
|
-
set_headers
|
79
|
-
get_request
|
72
|
+
def self.post url, params = {}, headers = {}, config = {}
|
73
|
+
initialize config
|
74
|
+
@agent.fetch url, :post, headers, params
|
80
75
|
end
|
81
76
|
|
82
|
-
private
|
83
77
|
|
84
|
-
#
|
78
|
+
# Set DEBUG flag
|
85
79
|
#
|
86
|
-
# @param
|
80
|
+
# @param debug Flag to start debug
|
87
81
|
#
|
88
|
-
def
|
89
|
-
|
82
|
+
def debug= debug
|
83
|
+
@agent.debug = debug
|
90
84
|
end
|
91
85
|
|
92
|
-
|
86
|
+
|
87
|
+
# Read READ_TIMEOUT agent attribute
|
93
88
|
#
|
94
|
-
|
89
|
+
def read_timeout
|
90
|
+
@agent.read_timeout
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
# Set READ_TIMEOUT agent attribute
|
95
|
+
#
|
96
|
+
# @param read_timeout Waiting time to reading
|
95
97
|
#
|
96
|
-
def
|
97
|
-
|
98
|
+
def read_timeout= read_timeout
|
99
|
+
@agent.read_timeout = read_timeout
|
98
100
|
end
|
99
101
|
|
100
102
|
|
101
|
-
#
|
103
|
+
# Read USER_AGENT agent attribute
|
102
104
|
#
|
103
|
-
def
|
104
|
-
|
105
|
-
if ['socks', :socks].include?(@params[:proxy][:proxy_type])
|
106
|
-
Net::HTTP.SOCKSProxy(@params[:proxy][:ip], @params[:proxy][:port])
|
107
|
-
else
|
108
|
-
Net::HTTP::Proxy(@params[:proxy][:ip], @params[:proxy][:port])
|
109
|
-
end
|
110
|
-
else
|
111
|
-
Net::HTTP
|
112
|
-
end
|
105
|
+
def user_agent
|
106
|
+
@agent.user_agent
|
113
107
|
end
|
114
108
|
|
115
109
|
|
116
|
-
# Set
|
110
|
+
# Set USER_AGENT agent attribute
|
117
111
|
#
|
118
|
-
# @param
|
119
|
-
# @param params Request params
|
112
|
+
# @param user_agent Web browser name
|
120
113
|
#
|
121
|
-
def
|
122
|
-
|
123
|
-
@request.basic_auth @params[:auth][:username], @params[:auth][:password]
|
124
|
-
end
|
114
|
+
def user_agent= user_agent
|
115
|
+
@agent.user_agent = user_agent
|
125
116
|
end
|
126
117
|
|
127
118
|
|
128
|
-
#
|
119
|
+
# Read PROXY agent attribute
|
129
120
|
#
|
130
|
-
|
131
|
-
|
121
|
+
def proxy
|
122
|
+
@agent.proxy
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
# Set PROXY agent attribute
|
132
127
|
#
|
133
|
-
|
134
|
-
|
128
|
+
# @param proxy Proxy configuration
|
129
|
+
#
|
130
|
+
def proxy= proxy
|
131
|
+
@agent.proxy = proxy
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Set BASIC_AUTH agent attribute
|
136
|
+
#
|
137
|
+
# @param username Authentification username
|
138
|
+
# @param password Authentification password
|
139
|
+
#
|
140
|
+
def basic_auth username, password
|
141
|
+
@agent.basic_auth = { username: username, password: password }
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
# Read HEADERS agent attribute
|
146
|
+
#
|
147
|
+
def headers
|
148
|
+
@agent.headers
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
# Set HEADERS agent attribute
|
153
|
+
#
|
154
|
+
# @param headers Request headers
|
155
|
+
#
|
156
|
+
def headers= headers
|
157
|
+
@agent.headers = headers
|
135
158
|
end
|
136
159
|
|
137
160
|
|
138
|
-
#
|
161
|
+
# Read COOKIES agent attribute
|
139
162
|
#
|
140
|
-
|
141
|
-
|
163
|
+
def cookies
|
164
|
+
@agent.cookies
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
# Set COOKIES agent attribute
|
169
|
+
#
|
170
|
+
# @param cookies Request cookies
|
142
171
|
#
|
143
|
-
def
|
144
|
-
@
|
172
|
+
def cookies= cookies
|
173
|
+
@agent.cookies = cookies
|
145
174
|
end
|
146
175
|
|
176
|
+
|
177
|
+
|
178
|
+
|
147
179
|
end
|
data/tiny_grabber.gemspec
CHANGED
@@ -20,10 +20,11 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.required_ruby_version = '~> 2.3.0'
|
22
22
|
|
23
|
-
spec.
|
24
|
-
spec.
|
23
|
+
spec.add_runtime_dependency 'socksify', '~> 1.7'
|
24
|
+
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
25
|
+
spec.add_runtime_dependency 'redis', '~> 3.3'
|
25
26
|
|
26
|
-
spec.add_development_dependency 'bundler', '~> 1.11'
|
27
27
|
spec.add_development_dependency 'rake', '~> 10.0'
|
28
|
+
spec.add_development_dependency 'bundler', '~> 1.11'
|
28
29
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
29
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny_grabber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksandr Chernyshev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-06-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: socksify
|
@@ -39,19 +39,19 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.6'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: redis
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
48
|
-
type: :
|
47
|
+
version: '3.3'
|
48
|
+
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '3.3'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: bundler
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.11'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.11'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rspec
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,9 +112,9 @@ files:
|
|
98
112
|
- bin/console
|
99
113
|
- bin/setup
|
100
114
|
- lib/tiny_grabber.rb
|
115
|
+
- lib/tiny_grabber/agent.rb
|
101
116
|
- lib/tiny_grabber/http.rb
|
102
117
|
- lib/tiny_grabber/version.rb
|
103
|
-
- tiny_grabber-0.0.2.gem
|
104
118
|
- tiny_grabber.gemspec
|
105
119
|
homepage: https://github.com/moroznoeytpo/tiny_grabber
|
106
120
|
licenses:
|
data/tiny_grabber-0.0.2.gem
DELETED
Binary file
|