http_crawler 0.3.2.3 → 0.3.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/http_crawler/client.rb +14 -0
- data/lib/http_crawler/version.rb +1 -1
- data/lib/http_crawler/web/client.rb +0 -1
- data/lib/http_crawler/web/{baidu → httpbin}/README.md +2 -2
- data/lib/http_crawler/web/httpbin/client.rb +25 -0
- data/lib/http_crawler/web/{baidu → httpbin}/response.rb +1 -1
- data/lib/http_crawler/web/httpbin/response/ip.rb +31 -0
- metadata +6 -6
- data/lib/http_crawler/web/baidu/client.rb +0 -35
- data/lib/http_crawler/web/baidu/response/index.rb +0 -16
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3ccf1f8dd94a975491c31e136eaaa74bd15fe2588d047a653847ee4f5899bdfb
|
|
4
|
+
data.tar.gz: d0669d6313fe3aa10c1eb8c9bafeb2707ca3a081de79b11171bb99e81657143d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 176cbcffb613e41738ff51246fbf13270d19190498ecd933567d4610edc561760657ebfa6c5620ef2982a32374d7ea0a113b1d97eb8a63d584fd5f995abc05e1
|
|
7
|
+
data.tar.gz: 3a74d6abb7ef3577a1962614d79991290a90fc60b6be7654b730dec2b13553ce087a1544cf9091b2006cd6a07633290de08f65f3930fc7f9a69b7dd66848f02d
|
data/lib/http_crawler/client.rb
CHANGED
|
@@ -204,6 +204,20 @@ module HttpCrawler
|
|
|
204
204
|
def init_cookies
|
|
205
205
|
@cookies = {}
|
|
206
206
|
end
|
|
207
|
+
|
|
208
|
+
# 创建时间: 2020/4/7 16:54
|
|
209
|
+
# 更新时间: 2020/4/7
|
|
210
|
+
# 作者: Jagger
|
|
211
|
+
# 方法名称: remove_traces
|
|
212
|
+
# 方法说明: 清除痕迹
|
|
213
|
+
# 调用方式: #remove_traces
|
|
214
|
+
#
|
|
215
|
+
# @return
|
|
216
|
+
#
|
|
217
|
+
def remove_traces
|
|
218
|
+
@response = nil
|
|
219
|
+
self.init_cookies
|
|
220
|
+
end
|
|
207
221
|
|
|
208
222
|
# 创建时间: 2019/9/16 17:13
|
|
209
223
|
# 更新时间: 2019/9/16
|
data/lib/http_crawler/version.rb
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
|
|
2
|
+
module HttpCrawler
|
|
3
|
+
module Web
|
|
4
|
+
module Httpbin
|
|
5
|
+
class Client < HttpCrawler::Web::Client
|
|
6
|
+
|
|
7
|
+
def init_client
|
|
8
|
+
# 设置整体超时时间 3 秒
|
|
9
|
+
@all_timeout = 3
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def init_uri
|
|
13
|
+
@uri = URI("http://httpbin.org/")
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def ip(parameter = {})
|
|
17
|
+
r = get("ip")
|
|
18
|
+
r.extend(HttpCrawler::Web::Httpbin::Response::Ip)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
end # module Baidu
|
|
23
|
+
end # module Web
|
|
24
|
+
end # module HttpCrawler
|
|
25
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# 查询
|
|
2
|
+
module HttpCrawler
|
|
3
|
+
module Web
|
|
4
|
+
module Httpbin
|
|
5
|
+
module Response
|
|
6
|
+
module Ip
|
|
7
|
+
|
|
8
|
+
# 创建时间: 2019/4/28 21:03
|
|
9
|
+
# 作者: Jagger
|
|
10
|
+
# 方法名称: parsing
|
|
11
|
+
# 方法说明: 解析数据
|
|
12
|
+
# 调用方式: #results
|
|
13
|
+
#
|
|
14
|
+
# @option parameter [Hash] Hash模式传参
|
|
15
|
+
# @param parameter [Hash]
|
|
16
|
+
# {
|
|
17
|
+
# "": , # 参数说明
|
|
18
|
+
# }
|
|
19
|
+
#
|
|
20
|
+
# @return JSON
|
|
21
|
+
#
|
|
22
|
+
def parsing(parameter = {})
|
|
23
|
+
self.json
|
|
24
|
+
end
|
|
25
|
+
end # module Index
|
|
26
|
+
end # module Response
|
|
27
|
+
end # module Baidu
|
|
28
|
+
end # module Web
|
|
29
|
+
end # module HttpCrawler
|
|
30
|
+
|
|
31
|
+
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: http_crawler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.2.
|
|
4
|
+
version: 0.3.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- jagger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2020-04-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|
|
@@ -153,11 +153,11 @@ files:
|
|
|
153
153
|
- lib/http_crawler/version.rb
|
|
154
154
|
- lib/http_crawler/web.rb
|
|
155
155
|
- lib/http_crawler/web/README.md
|
|
156
|
-
- lib/http_crawler/web/baidu/README.md
|
|
157
|
-
- lib/http_crawler/web/baidu/client.rb
|
|
158
|
-
- lib/http_crawler/web/baidu/response.rb
|
|
159
|
-
- lib/http_crawler/web/baidu/response/index.rb
|
|
160
156
|
- lib/http_crawler/web/client.rb
|
|
157
|
+
- lib/http_crawler/web/httpbin/README.md
|
|
158
|
+
- lib/http_crawler/web/httpbin/client.rb
|
|
159
|
+
- lib/http_crawler/web/httpbin/response.rb
|
|
160
|
+
- lib/http_crawler/web/httpbin/response/ip.rb
|
|
161
161
|
homepage: https://rubygems.org/gems/http_crawler
|
|
162
162
|
licenses:
|
|
163
163
|
- MIT
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
module HttpCrawler
|
|
3
|
-
module Web
|
|
4
|
-
module Baidu
|
|
5
|
-
class Client < HttpCrawler::Web::Client
|
|
6
|
-
|
|
7
|
-
def init_client
|
|
8
|
-
# 设置整体超时时间 3 秒
|
|
9
|
-
@all_timeout = 3
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
def init_uri
|
|
13
|
-
@uri = URI("https://www.baidu.com")
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def index(parameter = {})
|
|
17
|
-
r = get("/")
|
|
18
|
-
r.extend(HttpCrawler::Web::Baidu::Response::Index)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def search(parameter = {})
|
|
22
|
-
raise "parameter[:keyword] 不能为空" unless parameter[:keyword]
|
|
23
|
-
params = {
|
|
24
|
-
"wd": parameter[:keyword]
|
|
25
|
-
}
|
|
26
|
-
r = get("/s",params)
|
|
27
|
-
r
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
end # module Baidu
|
|
32
|
-
end # module Web
|
|
33
|
-
end # module HttpCrawler
|
|
34
|
-
|
|
35
|
-
load File.dirname(__FILE__) + '/response/index.rb'
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
# 查询
|
|
2
|
-
module HttpCrawler
|
|
3
|
-
module Web
|
|
4
|
-
module Baidu
|
|
5
|
-
module Response
|
|
6
|
-
module Index
|
|
7
|
-
def parsing(parameter = {})
|
|
8
|
-
html
|
|
9
|
-
end
|
|
10
|
-
end # module Index
|
|
11
|
-
end # module Response
|
|
12
|
-
end # module Baidu
|
|
13
|
-
end # module Web
|
|
14
|
-
end # module HttpCrawler
|
|
15
|
-
|
|
16
|
-
|