list_spider 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 837d9e4cb2b3aa829466cf9eaa4f48a24b5d4ff5067bbc27fb67fbdb37eec291
4
- data.tar.gz: 8d378b9e3240b8d9c3bdc9c7e32aceb39a16fc63310224dc7ce6a68a2c570893
3
+ metadata.gz: 9ef1d9cd5edd321718137a9b9055b6e223fb870f1209271df4142930c35dd6f5
4
+ data.tar.gz: 983055db2e6714337f4f76ec3d279a83f46d48e689be0334c9a944bce197599c
5
5
  SHA512:
6
- metadata.gz: dd2c77aa71d8ff3d7ecba93fc6e30ec158b479dcffed9e3cc744944e2bcea3cb5425fc59f85acc22573bcbe3d1eb9a0967e7d0b1e11d3c9cb8d04a58450a0a7e
7
- data.tar.gz: ec0e3ac5b2a09a3986eea20c69efc31c9536d1d96f77507e50755bfa07531c4bf7303317bc657a573dd8347bd304d8e93c9adbabf868918f0bdbe56c480e82e6
6
+ metadata.gz: dcccc6c7761931c93bd729e58a844e127d9ac7a63075f8163c92083f7749df2e978cc745071a2c319bcc48825d2dd53a2e043e97a02329884168cc50d21f7975
7
+ data.tar.gz: 33cd0806b64df7e88296a9ff8ce0a3b233f6702df9cdb4c87938c7cc0b64cde2d096970903a3242c228c308d7fc2d8a35f4eff85d2262e5034de83bf7d009e85
data/Gemfile.lock ADDED
@@ -0,0 +1,41 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ list_spider (2.0.0)
5
+ em-http-request (~> 1.1, >= 1.1.3)
6
+ nokogiri (~> 1.6, >= 1.6.7)
7
+ rchardet (~> 1.6, >= 1.6.1)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ addressable (2.5.2)
13
+ public_suffix (>= 2.0.2, < 4.0)
14
+ cookiejar (0.3.3)
15
+ em-http-request (1.1.5)
16
+ addressable (>= 2.3.4)
17
+ cookiejar (!= 0.3.1)
18
+ em-socksify (>= 0.3)
19
+ eventmachine (>= 1.0.3)
20
+ http_parser.rb (>= 0.6.0)
21
+ em-socksify (0.3.2)
22
+ eventmachine (>= 1.0.0.beta.4)
23
+ eventmachine (1.2.5)
24
+ http_parser.rb (0.6.0)
25
+ mini_portile2 (2.3.0)
26
+ nokogiri (1.8.2)
27
+ mini_portile2 (~> 2.3.0)
28
+ public_suffix (3.0.2)
29
+ rake (10.5.0)
30
+ rchardet (1.7.0)
31
+
32
+ PLATFORMS
33
+ ruby
34
+
35
+ DEPENDENCIES
36
+ bundler (~> 1.16)
37
+ list_spider!
38
+ rake (~> 10.0)
39
+
40
+ BUNDLED WITH
41
+ 1.16.1
data/lib/list_spider.rb CHANGED
@@ -14,7 +14,7 @@ class TaskStruct
14
14
  http_method: :get,
15
15
  custom_data: nil, # 自定义数据
16
16
  parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
17
- # 请求成功后的回调,此时可能没有保存文件,比如301,
17
+ # 请求成功后的回调,此时可能没有保存文件,比如301,404
18
18
  # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
19
19
  # http.response_header.status 状态码
20
20
  # http.response_header 返回头
@@ -28,9 +28,9 @@ class TaskStruct
28
28
  overwrite_exist: false, # 是否覆盖现有文件
29
29
  # request options
30
30
  redirects: 3, # 重定向次数
31
- # keepalive: nil, # (暂不支持)
31
+ keepalive: nil, # (暂不支持复用)
32
32
  file: nil, # 要上传的文件路径
33
- # path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
33
+ path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
34
34
  query: nil, # 查询字符串,可以是string或hash类型
35
35
  body: nil, # 请求体,可以是string或hash类型
36
36
  head: nil, # 请求头
@@ -71,9 +71,9 @@ class TaskStruct
71
71
 
72
72
  @request_options = {
73
73
  redirects: redirects,
74
- # keepalive: keepalive,
74
+ keepalive: keepalive,
75
75
  file: file,
76
- # path: path,
76
+ path: path,
77
77
  query: query,
78
78
  body: body,
79
79
  head: head
@@ -155,20 +155,20 @@ module ListSpider
155
155
 
156
156
  if task_struct.errback
157
157
  task_struct.errback.call(task_struct, http_req)
158
- else
159
- ret = false
160
- if task_struct.http_method == :get
161
- ret = SpiderHelper.direct_http_get(task_struct.href, task_struct.local_path, convert_to_utf8: @convert_to_utf8)
162
- elsif task_struct.http_method == :post
163
- ret = SpiderHelper.direct_http_post(task_struct.href, task_struct.local_path, task_struct.params, convert_to_utf8: @convert_to_utf8)
164
- end
165
-
166
- if ret
167
- call_parse_method(task_struct)
168
- succeed_list << task_struct
169
- else
170
- failed_list << task_struct
171
- end
158
+ # else
159
+ # ret = false
160
+ # if task_struct.http_method == :get
161
+ # ret = SpiderHelper.direct_http_get(task_struct.href, task_struct.local_path, convert_to_utf8: @convert_to_utf8)
162
+ # elsif task_struct.http_method == :post
163
+ # ret = SpiderHelper.direct_http_post(task_struct.href, task_struct.local_path, task_struct.params, convert_to_utf8: @convert_to_utf8)
164
+ # end
165
+
166
+ # if ret
167
+ # call_parse_method(task_struct)
168
+ # succeed_list << task_struct
169
+ # else
170
+ # failed_list << task_struct
171
+ # end
172
172
  end
173
173
  end
174
174
 
@@ -1,3 +1,3 @@
1
1
  module ListSpider
2
- VERSION = '2.0.0'.freeze
2
+ VERSION = '2.0.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Charles Zhang
@@ -108,6 +108,7 @@ files:
108
108
  - ".gitignore"
109
109
  - ".rubocop.yml"
110
110
  - Gemfile
111
+ - Gemfile.lock
111
112
  - README.md
112
113
  - Rakefile
113
114
  - bin/console