list_spider 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +41 -0
- data/lib/list_spider.rb +19 -19
- data/lib/list_spider/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ef1d9cd5edd321718137a9b9055b6e223fb870f1209271df4142930c35dd6f5
|
4
|
+
data.tar.gz: 983055db2e6714337f4f76ec3d279a83f46d48e689be0334c9a944bce197599c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcccc6c7761931c93bd729e58a844e127d9ac7a63075f8163c92083f7749df2e978cc745071a2c319bcc48825d2dd53a2e043e97a02329884168cc50d21f7975
|
7
|
+
data.tar.gz: 33cd0806b64df7e88296a9ff8ce0a3b233f6702df9cdb4c87938c7cc0b64cde2d096970903a3242c228c308d7fc2d8a35f4eff85d2262e5034de83bf7d009e85
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
list_spider (2.0.0)
|
5
|
+
em-http-request (~> 1.1, >= 1.1.3)
|
6
|
+
nokogiri (~> 1.6, >= 1.6.7)
|
7
|
+
rchardet (~> 1.6, >= 1.6.1)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
addressable (2.5.2)
|
13
|
+
public_suffix (>= 2.0.2, < 4.0)
|
14
|
+
cookiejar (0.3.3)
|
15
|
+
em-http-request (1.1.5)
|
16
|
+
addressable (>= 2.3.4)
|
17
|
+
cookiejar (!= 0.3.1)
|
18
|
+
em-socksify (>= 0.3)
|
19
|
+
eventmachine (>= 1.0.3)
|
20
|
+
http_parser.rb (>= 0.6.0)
|
21
|
+
em-socksify (0.3.2)
|
22
|
+
eventmachine (>= 1.0.0.beta.4)
|
23
|
+
eventmachine (1.2.5)
|
24
|
+
http_parser.rb (0.6.0)
|
25
|
+
mini_portile2 (2.3.0)
|
26
|
+
nokogiri (1.8.2)
|
27
|
+
mini_portile2 (~> 2.3.0)
|
28
|
+
public_suffix (3.0.2)
|
29
|
+
rake (10.5.0)
|
30
|
+
rchardet (1.7.0)
|
31
|
+
|
32
|
+
PLATFORMS
|
33
|
+
ruby
|
34
|
+
|
35
|
+
DEPENDENCIES
|
36
|
+
bundler (~> 1.16)
|
37
|
+
list_spider!
|
38
|
+
rake (~> 10.0)
|
39
|
+
|
40
|
+
BUNDLED WITH
|
41
|
+
1.16.1
|
data/lib/list_spider.rb
CHANGED
@@ -14,7 +14,7 @@ class TaskStruct
|
|
14
14
|
http_method: :get,
|
15
15
|
custom_data: nil, # 自定义数据
|
16
16
|
parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
|
17
|
-
# 请求成功后的回调,此时可能没有保存文件,比如301,
|
17
|
+
# 请求成功后的回调,此时可能没有保存文件,比如301,404
|
18
18
|
# 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
|
19
19
|
# http.response_header.status 状态码
|
20
20
|
# http.response_header 返回头
|
@@ -28,9 +28,9 @@ class TaskStruct
|
|
28
28
|
overwrite_exist: false, # 是否覆盖现有文件
|
29
29
|
# request options
|
30
30
|
redirects: 3, # 重定向次数
|
31
|
-
|
31
|
+
keepalive: nil, # (暂不支持复用)
|
32
32
|
file: nil, # 要上传的文件路径
|
33
|
-
|
33
|
+
path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
|
34
34
|
query: nil, # 查询字符串,可以是string或hash类型
|
35
35
|
body: nil, # 请求体,可以是string或hash类型
|
36
36
|
head: nil, # 请求头
|
@@ -71,9 +71,9 @@ class TaskStruct
|
|
71
71
|
|
72
72
|
@request_options = {
|
73
73
|
redirects: redirects,
|
74
|
-
|
74
|
+
keepalive: keepalive,
|
75
75
|
file: file,
|
76
|
-
|
76
|
+
path: path,
|
77
77
|
query: query,
|
78
78
|
body: body,
|
79
79
|
head: head
|
@@ -155,20 +155,20 @@ module ListSpider
|
|
155
155
|
|
156
156
|
if task_struct.errback
|
157
157
|
task_struct.errback.call(task_struct, http_req)
|
158
|
-
else
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
158
|
+
# else
|
159
|
+
# ret = false
|
160
|
+
# if task_struct.http_method == :get
|
161
|
+
# ret = SpiderHelper.direct_http_get(task_struct.href, task_struct.local_path, convert_to_utf8: @convert_to_utf8)
|
162
|
+
# elsif task_struct.http_method == :post
|
163
|
+
# ret = SpiderHelper.direct_http_post(task_struct.href, task_struct.local_path, task_struct.params, convert_to_utf8: @convert_to_utf8)
|
164
|
+
# end
|
165
|
+
|
166
|
+
# if ret
|
167
|
+
# call_parse_method(task_struct)
|
168
|
+
# succeed_list << task_struct
|
169
|
+
# else
|
170
|
+
# failed_list << task_struct
|
171
|
+
# end
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
data/lib/list_spider/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: list_spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Charles Zhang
|
@@ -108,6 +108,7 @@ files:
|
|
108
108
|
- ".gitignore"
|
109
109
|
- ".rubocop.yml"
|
110
110
|
- Gemfile
|
111
|
+
- Gemfile.lock
|
111
112
|
- README.md
|
112
113
|
- Rakefile
|
113
114
|
- bin/console
|