list_spider 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +41 -0
- data/lib/list_spider.rb +19 -19
- data/lib/list_spider/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ef1d9cd5edd321718137a9b9055b6e223fb870f1209271df4142930c35dd6f5
|
4
|
+
data.tar.gz: 983055db2e6714337f4f76ec3d279a83f46d48e689be0334c9a944bce197599c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcccc6c7761931c93bd729e58a844e127d9ac7a63075f8163c92083f7749df2e978cc745071a2c319bcc48825d2dd53a2e043e97a02329884168cc50d21f7975
|
7
|
+
data.tar.gz: 33cd0806b64df7e88296a9ff8ce0a3b233f6702df9cdb4c87938c7cc0b64cde2d096970903a3242c228c308d7fc2d8a35f4eff85d2262e5034de83bf7d009e85
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
list_spider (2.0.0)
|
5
|
+
em-http-request (~> 1.1, >= 1.1.3)
|
6
|
+
nokogiri (~> 1.6, >= 1.6.7)
|
7
|
+
rchardet (~> 1.6, >= 1.6.1)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
addressable (2.5.2)
|
13
|
+
public_suffix (>= 2.0.2, < 4.0)
|
14
|
+
cookiejar (0.3.3)
|
15
|
+
em-http-request (1.1.5)
|
16
|
+
addressable (>= 2.3.4)
|
17
|
+
cookiejar (!= 0.3.1)
|
18
|
+
em-socksify (>= 0.3)
|
19
|
+
eventmachine (>= 1.0.3)
|
20
|
+
http_parser.rb (>= 0.6.0)
|
21
|
+
em-socksify (0.3.2)
|
22
|
+
eventmachine (>= 1.0.0.beta.4)
|
23
|
+
eventmachine (1.2.5)
|
24
|
+
http_parser.rb (0.6.0)
|
25
|
+
mini_portile2 (2.3.0)
|
26
|
+
nokogiri (1.8.2)
|
27
|
+
mini_portile2 (~> 2.3.0)
|
28
|
+
public_suffix (3.0.2)
|
29
|
+
rake (10.5.0)
|
30
|
+
rchardet (1.7.0)
|
31
|
+
|
32
|
+
PLATFORMS
|
33
|
+
ruby
|
34
|
+
|
35
|
+
DEPENDENCIES
|
36
|
+
bundler (~> 1.16)
|
37
|
+
list_spider!
|
38
|
+
rake (~> 10.0)
|
39
|
+
|
40
|
+
BUNDLED WITH
|
41
|
+
1.16.1
|
data/lib/list_spider.rb
CHANGED
@@ -14,7 +14,7 @@ class TaskStruct
|
|
14
14
|
http_method: :get,
|
15
15
|
custom_data: nil, # 自定义数据
|
16
16
|
parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
|
17
|
-
# 请求成功后的回调,此时可能没有保存文件,比如301,
|
17
|
+
# 请求成功后的回调,此时可能没有保存文件,比如301,404
|
18
18
|
# 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
|
19
19
|
# http.response_header.status 状态码
|
20
20
|
# http.response_header 返回头
|
@@ -28,9 +28,9 @@ class TaskStruct
|
|
28
28
|
overwrite_exist: false, # 是否覆盖现有文件
|
29
29
|
# request options
|
30
30
|
redirects: 3, # 重定向次数
|
31
|
-
|
31
|
+
keepalive: nil, # (暂不支持复用)
|
32
32
|
file: nil, # 要上传的文件路径
|
33
|
-
|
33
|
+
path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
|
34
34
|
query: nil, # 查询字符串,可以是string或hash类型
|
35
35
|
body: nil, # 请求体,可以是string或hash类型
|
36
36
|
head: nil, # 请求头
|
@@ -71,9 +71,9 @@ class TaskStruct
|
|
71
71
|
|
72
72
|
@request_options = {
|
73
73
|
redirects: redirects,
|
74
|
-
|
74
|
+
keepalive: keepalive,
|
75
75
|
file: file,
|
76
|
-
|
76
|
+
path: path,
|
77
77
|
query: query,
|
78
78
|
body: body,
|
79
79
|
head: head
|
@@ -155,20 +155,20 @@ module ListSpider
|
|
155
155
|
|
156
156
|
if task_struct.errback
|
157
157
|
task_struct.errback.call(task_struct, http_req)
|
158
|
-
else
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
158
|
+
# else
|
159
|
+
# ret = false
|
160
|
+
# if task_struct.http_method == :get
|
161
|
+
# ret = SpiderHelper.direct_http_get(task_struct.href, task_struct.local_path, convert_to_utf8: @convert_to_utf8)
|
162
|
+
# elsif task_struct.http_method == :post
|
163
|
+
# ret = SpiderHelper.direct_http_post(task_struct.href, task_struct.local_path, task_struct.params, convert_to_utf8: @convert_to_utf8)
|
164
|
+
# end
|
165
|
+
|
166
|
+
# if ret
|
167
|
+
# call_parse_method(task_struct)
|
168
|
+
# succeed_list << task_struct
|
169
|
+
# else
|
170
|
+
# failed_list << task_struct
|
171
|
+
# end
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
data/lib/list_spider/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: list_spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Charles Zhang
|
@@ -108,6 +108,7 @@ files:
|
|
108
108
|
- ".gitignore"
|
109
109
|
- ".rubocop.yml"
|
110
110
|
- Gemfile
|
111
|
+
- Gemfile.lock
|
111
112
|
- README.md
|
112
113
|
- Rakefile
|
113
114
|
- bin/console
|