http_crawler 0.3.1.4 → 0.3.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/http_crawler/client.rb +13 -5
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7976a9fa4410e543a29dcbf761b79c2aa2b73f89604b7108daab863e3bf76d0
|
4
|
+
data.tar.gz: 349418d39a8346ac6844f3600541b982a6d7cbef445cd565b15ef2e8ef8cac2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 932954f758d3136a124ea0b80da7bf1198cb8b86f0e1c52c1a838a119a307ad72bc572e78200b5d9e7ca25e4e3b8f2e645da4d1923e60b5f59294fa323ad5f94
|
7
|
+
data.tar.gz: e3b5598ddab7ed79bc18b445a120f0d91466fa701599339c480d5f882c4c305fc4aae966af5c64bbcfbfe989c6eee14494895fd89e512b710dede2705b086ea0
|
data/lib/http_crawler/client.rb
CHANGED
@@ -82,10 +82,12 @@ module HttpCrawler
|
|
82
82
|
attr_accessor :header
|
83
83
|
# 头文件相关方法
|
84
84
|
def header(parameter = {})
|
85
|
+
parameter = parameter.symbolize_keys
|
85
86
|
@header ||= init_header
|
86
87
|
end
|
87
88
|
|
88
89
|
def init_header(parameter = {})
|
90
|
+
parameter = parameter.symbolize_keys
|
89
91
|
@header = {
|
90
92
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
91
93
|
"Accept-Encoding": "gzip, br",
|
@@ -97,20 +99,24 @@ module HttpCrawler
|
|
97
99
|
end
|
98
100
|
|
99
101
|
def update_header(parameter = {})
|
102
|
+
parameter = parameter.symbolize_keys
|
100
103
|
@header = init_header
|
101
104
|
end
|
102
105
|
|
103
106
|
attr_accessor :cookies
|
104
107
|
# cookies相关方法
|
105
108
|
def cookies(parameter = {})
|
109
|
+
parameter = parameter.symbolize_keys
|
106
110
|
@cookies ||= init_cookies
|
107
111
|
end
|
108
112
|
|
109
113
|
def init_cookies(parameter = {})
|
114
|
+
parameter = parameter.symbolize_keys
|
110
115
|
@cookies = {}
|
111
116
|
end
|
112
117
|
|
113
118
|
def update_cookies(parameter = {})
|
119
|
+
parameter = parameter.symbolize_keys
|
114
120
|
nil
|
115
121
|
end
|
116
122
|
|
@@ -136,10 +142,11 @@ module HttpCrawler
|
|
136
142
|
|
137
143
|
# 调用代理 api使用的参数
|
138
144
|
def proxy_params
|
139
|
-
@proxy_params ||= {
|
145
|
+
@proxy_params ||= {key: "default"}
|
140
146
|
end
|
141
147
|
|
142
148
|
def update_proxy(proxy = {})
|
149
|
+
proxy = proxy.symbolize_keys
|
143
150
|
if (proxy.blank?)
|
144
151
|
@proxy = get_proxy
|
145
152
|
else
|
@@ -167,7 +174,7 @@ module HttpCrawler
|
|
167
174
|
begin
|
168
175
|
Rails.logger.debug("开始获取代理IP")
|
169
176
|
proxy_client = HttpCrawler::Proxy.for(proxy_api)
|
170
|
-
proxy_r = proxy_client.get_proxy(proxy_params)
|
177
|
+
proxy_r = proxy_client.get_proxy(proxy_params.symbolize_keys)
|
171
178
|
proxy_ip = proxy_r.results unless proxy_r.results.blank?
|
172
179
|
if proxy_ip.blank?
|
173
180
|
Rails.logger.warn "无最新代理等待5秒后重新获取:proxy 为空"
|
@@ -176,10 +183,10 @@ module HttpCrawler
|
|
176
183
|
end
|
177
184
|
sleep(5)
|
178
185
|
end while true
|
179
|
-
|
186
|
+
proxy_ip = proxy_ip.symbolize_keys
|
180
187
|
Rails.logger.debug("当前IP => #{@proxy},获取最新代理 => #{proxy_ip}")
|
181
188
|
|
182
|
-
unless proxy_ip[
|
189
|
+
unless proxy_ip[:p_addr] && proxy_ip[:p_port]
|
183
190
|
Rails.logger.warn "无最新代理等待5秒后重新获取:p_addr 或 p_port 为空"
|
184
191
|
sleep(5)
|
185
192
|
proxy_ip = get_proxy
|
@@ -211,7 +218,7 @@ module HttpCrawler
|
|
211
218
|
h = HTTP.follow(max_hops: 5)
|
212
219
|
|
213
220
|
# 添加代理
|
214
|
-
h = h.via(@proxy[
|
221
|
+
h = h.via(@proxy[:p_addr], @proxy[:p_port].to_i, @proxy[:p_user], @proxy[:p_pass]) unless (@proxy.blank?)
|
215
222
|
|
216
223
|
# 添加头文件
|
217
224
|
h = h.headers(header) if header
|
@@ -236,6 +243,7 @@ module HttpCrawler
|
|
236
243
|
# 继承类需要重定义 init_uri
|
237
244
|
#
|
238
245
|
def initialize(parameter = {})
|
246
|
+
parameter = parameter.symbolize_keys
|
239
247
|
# 初始化 uri
|
240
248
|
init_uri
|
241
249
|
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1.
|
4
|
+
version: 0.3.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-03-
|
11
|
+
date: 2019-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|