http_crawler 0.3.1.4 → 0.3.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e5fd1f93c080268b8ff29be71b4e0d7557fb6e489eceba9a342945997119b77e
4
- data.tar.gz: 07ea9e9e7badc4f84275c306371e9ce8cd5ef6c855ba57e84141183a763dc2dd
3
+ metadata.gz: a7976a9fa4410e543a29dcbf761b79c2aa2b73f89604b7108daab863e3bf76d0
4
+ data.tar.gz: 349418d39a8346ac6844f3600541b982a6d7cbef445cd565b15ef2e8ef8cac2c
5
5
  SHA512:
6
- metadata.gz: d174a147cad9851e140804d5f40383d536a3c1b1389d0d181e0e73d76b736ed4d6a5e8ebb0c1d6541abae37bf23fd1ef4340d085a9608f4d52977cc22a1cdeb3
7
- data.tar.gz: e19bb87187a80c041f8884d2035d956c2dbe3aaa8e1ed1591036f8259f48847f5a3f36fc43b4a65ac3bbf713969091c3e8ddec0492a766129f2457a2969ff84f
6
+ metadata.gz: 932954f758d3136a124ea0b80da7bf1198cb8b86f0e1c52c1a838a119a307ad72bc572e78200b5d9e7ca25e4e3b8f2e645da4d1923e60b5f59294fa323ad5f94
7
+ data.tar.gz: e3b5598ddab7ed79bc18b445a120f0d91466fa701599339c480d5f882c4c305fc4aae966af5c64bbcfbfe989c6eee14494895fd89e512b710dede2705b086ea0
@@ -82,10 +82,12 @@ module HttpCrawler
82
82
  attr_accessor :header
83
83
  # 头文件相关方法
84
84
  def header(parameter = {})
85
+ parameter = parameter.symbolize_keys
85
86
  @header ||= init_header
86
87
  end
87
88
 
88
89
  def init_header(parameter = {})
90
+ parameter = parameter.symbolize_keys
89
91
  @header = {
90
92
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
91
93
  "Accept-Encoding": "gzip, br",
@@ -97,20 +99,24 @@ module HttpCrawler
97
99
  end
98
100
 
99
101
  def update_header(parameter = {})
102
+ parameter = parameter.symbolize_keys
100
103
  @header = init_header
101
104
  end
102
105
 
103
106
  attr_accessor :cookies
104
107
  # cookies相关方法
105
108
  def cookies(parameter = {})
109
+ parameter = parameter.symbolize_keys
106
110
  @cookies ||= init_cookies
107
111
  end
108
112
 
109
113
  def init_cookies(parameter = {})
114
+ parameter = parameter.symbolize_keys
110
115
  @cookies = {}
111
116
  end
112
117
 
113
118
  def update_cookies(parameter = {})
119
+ parameter = parameter.symbolize_keys
114
120
  nil
115
121
  end
116
122
 
@@ -136,10 +142,11 @@ module HttpCrawler
136
142
 
137
143
  # 调用代理 api使用的参数
138
144
  def proxy_params
139
- @proxy_params ||= {"key": "default"}
145
+ @proxy_params ||= {key: "default"}
140
146
  end
141
147
 
142
148
  def update_proxy(proxy = {})
149
+ proxy = proxy.symbolize_keys
143
150
  if (proxy.blank?)
144
151
  @proxy = get_proxy
145
152
  else
@@ -167,7 +174,7 @@ module HttpCrawler
167
174
  begin
168
175
  Rails.logger.debug("开始获取代理IP")
169
176
  proxy_client = HttpCrawler::Proxy.for(proxy_api)
170
- proxy_r = proxy_client.get_proxy(proxy_params)
177
+ proxy_r = proxy_client.get_proxy(proxy_params.symbolize_keys)
171
178
  proxy_ip = proxy_r.results unless proxy_r.results.blank?
172
179
  if proxy_ip.blank?
173
180
  Rails.logger.warn "无最新代理等待5秒后重新获取:proxy 为空"
@@ -176,10 +183,10 @@ module HttpCrawler
176
183
  end
177
184
  sleep(5)
178
185
  end while true
179
-
186
+ proxy_ip = proxy_ip.symbolize_keys
180
187
  Rails.logger.debug("当前IP => #{@proxy},获取最新代理 => #{proxy_ip}")
181
188
 
182
- unless proxy_ip["p_addr"] && proxy_ip["p_port"]
189
+ unless proxy_ip[:p_addr] && proxy_ip[:p_port]
183
190
  Rails.logger.warn "无最新代理等待5秒后重新获取:p_addr 或 p_port 为空"
184
191
  sleep(5)
185
192
  proxy_ip = get_proxy
@@ -211,7 +218,7 @@ module HttpCrawler
211
218
  h = HTTP.follow(max_hops: 5)
212
219
 
213
220
  # 添加代理
214
- h = h.via(@proxy["p_addr"], @proxy["p_port"].to_i, @proxy["p_user"], @proxy["p_pass"]) unless (@proxy.blank?)
221
+ h = h.via(@proxy[:p_addr], @proxy[:p_port].to_i, @proxy[:p_user], @proxy[:p_pass]) unless (@proxy.blank?)
215
222
 
216
223
  # 添加头文件
217
224
  h = h.headers(header) if header
@@ -236,6 +243,7 @@ module HttpCrawler
236
243
  # 继承类需要重定义 init_uri
237
244
  #
238
245
  def initialize(parameter = {})
246
+ parameter = parameter.symbolize_keys
239
247
  # 初始化 uri
240
248
  init_uri
241
249
 
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.1.4"
2
+ VERSION = "0.3.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1.4
4
+ version: 0.3.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-01 00:00:00.000000000 Z
11
+ date: 2019-03-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec