list_spider 0.3.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +84 -0
- data/.rubocop.yml +48 -0
- data/Gemfile +6 -0
- data/README.md +186 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/check_code.sh +3 -0
- data/lib/list_spider.rb +6 -5
- data/lib/list_spider/version.rb +3 -0
- data/lib/spider_helper.rb +2 -2
- data/list_spider.gemspec +31 -0
- data/spider_example.rb +29 -0
- data/spider_example_2.rb +29 -0
- metadata +46 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 197035f7521ba4c326c0181c7133afe4c5d7bacfc3246795dc32758dce40da64
|
4
|
+
data.tar.gz: 89d14776f4c041806b6b9e164b31e651d03746c74d83505d5a32c1aeeaa62aa2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1b38832345203ec036ff4f8e11fba1d92e8ec58674d05ef129784a9e274dcd03ef421fa3db6e38bc38d7bb1cf3c54b7d56cbb321a5340bbe197fe57099ed077
|
7
|
+
data.tar.gz: 43de7e093004c823abb3c51a053869fd294af7fee9f9724c499af572ead7d5ba79d7ab9bb16b2baae1e00a1d198f89fcfbbedc35f57a3a8ed00f7f785d40cbfc
|
data/.gitignore
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
## Specific to RubyMotion:
|
14
|
+
.dat*
|
15
|
+
.repl_history
|
16
|
+
build/
|
17
|
+
|
18
|
+
## Documentation cache and generated files:
|
19
|
+
/.yardoc/
|
20
|
+
/_yardoc/
|
21
|
+
/doc/
|
22
|
+
/rdoc/
|
23
|
+
|
24
|
+
## Environment normalisation:
|
25
|
+
/.bundle/
|
26
|
+
/vendor/bundle
|
27
|
+
/lib/bundler/man/
|
28
|
+
|
29
|
+
# for a library or gem, you might want to ignore these files since the code is
|
30
|
+
# intended to run in multiple environments; otherwise, check them in:
|
31
|
+
# Gemfile.lock
|
32
|
+
# .ruby-version
|
33
|
+
# .ruby-gemset
|
34
|
+
|
35
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
36
|
+
.rvmrc
|
37
|
+
|
38
|
+
.DS_Store
|
39
|
+
.AppleDouble
|
40
|
+
.LSOverride
|
41
|
+
|
42
|
+
# Icon must end with two \r
|
43
|
+
Icon
|
44
|
+
|
45
|
+
|
46
|
+
# Thumbnails
|
47
|
+
._*
|
48
|
+
|
49
|
+
# Files that might appear in the root of a volume
|
50
|
+
.DocumentRevisions-V100
|
51
|
+
.fseventsd
|
52
|
+
.Spotlight-V100
|
53
|
+
.TemporaryItems
|
54
|
+
.Trashes
|
55
|
+
.VolumeIcon.icns
|
56
|
+
|
57
|
+
# Directories potentially created on remote AFP share
|
58
|
+
.AppleDB
|
59
|
+
.AppleDesktop
|
60
|
+
Network Trash Folder
|
61
|
+
Temporary Items
|
62
|
+
.apdisk
|
63
|
+
|
64
|
+
# Windows image file caches
|
65
|
+
Thumbs.db
|
66
|
+
ehthumbs.db
|
67
|
+
|
68
|
+
# Folder config file
|
69
|
+
Desktop.ini
|
70
|
+
|
71
|
+
# Recycle Bin used on file shares
|
72
|
+
$RECYCLE.BIN/
|
73
|
+
|
74
|
+
# Windows Installer files
|
75
|
+
*.cab
|
76
|
+
*.msi
|
77
|
+
*.msm
|
78
|
+
*.msp
|
79
|
+
|
80
|
+
# Windows shortcuts
|
81
|
+
*.lnk
|
82
|
+
|
83
|
+
rubocopresult
|
84
|
+
coolshell
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
Metrics/LineLength:
|
2
|
+
Max: 120
|
3
|
+
Metrics/MethodLength:
|
4
|
+
Max: 50
|
5
|
+
Metrics/ParameterLists:
|
6
|
+
Max: 12
|
7
|
+
Metrics/AbcSize:
|
8
|
+
Max: 50
|
9
|
+
Metrics/CyclomaticComplexity:
|
10
|
+
Max: 10
|
11
|
+
Metrics/PerceivedComplexity:
|
12
|
+
Max: 10
|
13
|
+
Style/GuardClause:
|
14
|
+
MinBodyLength: 5
|
15
|
+
Style/AsciiComments:
|
16
|
+
Enabled: false
|
17
|
+
Style/Documentation:
|
18
|
+
Enabled: false
|
19
|
+
Lint/AmbiguousRegexpLiteral:
|
20
|
+
Enabled: false
|
21
|
+
Lint/DefEndAlignment:
|
22
|
+
AutoCorrect: true
|
23
|
+
Lint/EndAlignment:
|
24
|
+
AutoCorrect: true
|
25
|
+
Style/BracesAroundHashParameters:
|
26
|
+
Enabled: false
|
27
|
+
Style/ClassAndModuleChildren:
|
28
|
+
Enabled: false
|
29
|
+
Style/AutoResourceCleanup:
|
30
|
+
Enabled: true
|
31
|
+
Style/CollectionMethods:
|
32
|
+
Enabled: true
|
33
|
+
Style/Encoding:
|
34
|
+
Enabled: true
|
35
|
+
Style/MethodCalledOnDoEndBlock:
|
36
|
+
Enabled: true
|
37
|
+
Layout/MultilineAssignmentLayout:
|
38
|
+
Enabled: true
|
39
|
+
Style/OptionHash:
|
40
|
+
Enabled: true
|
41
|
+
Style/StringMethods:
|
42
|
+
Enabled: true
|
43
|
+
Style/SymbolArray:
|
44
|
+
Enabled: true
|
45
|
+
Style/NonNilCheck:
|
46
|
+
IncludeSemanticChanges: true
|
47
|
+
Style/Send:
|
48
|
+
Enabled: true
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,186 @@
|
|
1
|
+
# list_spider
|
2
|
+
|
3
|
+
A url list spider based on em-http-request.
|
4
|
+
|
5
|
+
Many times we only need to spider by url list then parse them and spider again. This is for the purpose.
|
6
|
+
|
7
|
+
## Features
|
8
|
+
* Duplicate url filtering (based on local path, so you can custom your behavior).
|
9
|
+
|
10
|
+
* Convert to UTF-8 support.
|
11
|
+
|
12
|
+
* Increased spider support (don't spider exist).
|
13
|
+
|
14
|
+
* Customize concurrent number and interval between task.
|
15
|
+
|
16
|
+
* Http options support.
|
17
|
+
|
18
|
+
## Getting started
|
19
|
+
|
20
|
+
gem install list_spider
|
21
|
+
|
22
|
+
## Use like this
|
23
|
+
```ruby
|
24
|
+
require 'list_spider'
|
25
|
+
|
26
|
+
DOWNLOAD_DIR = 'coolshell/'
|
27
|
+
|
28
|
+
$next_list = []
|
29
|
+
|
30
|
+
def parse_index_item(file_name)
|
31
|
+
content = File.read(file_name)
|
32
|
+
doc = Nokogiri::HTML(content)
|
33
|
+
list_group = doc.css("h2.entry-title")
|
34
|
+
link_list = list_group.css("a")
|
35
|
+
|
36
|
+
link_list.each do |link|
|
37
|
+
href = link['href']
|
38
|
+
local_path = DOWNLOAD_DIR + link.content + ".html"
|
39
|
+
#or you can save them to database for later use
|
40
|
+
$next_list<< TaskStruct.new(href, local_path)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
task_list = []
|
45
|
+
task_list << TaskStruct.new('https://coolshell.cn/', DOWNLOAD_DIR + 'index.html', parse_method: method(:parse_index_item))
|
46
|
+
|
47
|
+
ListSpider.get_list(task_list)
|
48
|
+
ListSpider.get_list($next_list, max: 60)
|
49
|
+
|
50
|
+
```
|
51
|
+
|
52
|
+
## Or in one step
|
53
|
+
```ruby
|
54
|
+
require 'list_spider'
|
55
|
+
|
56
|
+
DOWNLOAD_DIR = 'coolshell/'
|
57
|
+
|
58
|
+
def parse_index_item(file_name)
|
59
|
+
|
60
|
+
content = File.read(file_name)
|
61
|
+
doc = Nokogiri::HTML(content)
|
62
|
+
list_group = doc.css("h2.entry-title")
|
63
|
+
link_list = list_group.css("a")
|
64
|
+
|
65
|
+
link_list.each do |link|
|
66
|
+
href = link['href']
|
67
|
+
local_path = DOWNLOAD_DIR + link.content + ".html"
|
68
|
+
ListSpider.add_task(TaskStruct.new(href, local_path))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
#get_one is a simple function for one taskstruct situation
|
73
|
+
ListSpider.get_one(TaskStruct.new(
|
74
|
+
'https://coolshell.cn/',
|
75
|
+
DOWNLOAD_DIR + 'index.html',
|
76
|
+
parse_method: method(:parse_index_item)),
|
77
|
+
max: 60)
|
78
|
+
|
79
|
+
```
|
80
|
+
|
81
|
+
## You can define parse method in four forms
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
def parse_response(file_name)
|
85
|
+
#...
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
# extra_data is passed by TaskStruct's extra_data param
|
90
|
+
|
91
|
+
def parse_response(file_name, extra_data)
|
92
|
+
#...
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# response_header is a EventMachine::HttpResponseHeader object
|
97
|
+
# you can use it like this:
|
98
|
+
# response_header.status
|
99
|
+
# response_header.cookie
|
100
|
+
# response_header['Last-Modified']
|
101
|
+
|
102
|
+
def parse_response(file_name, extra_data, response_header)
|
103
|
+
response_header.status
|
104
|
+
response_header['Last-Modified']
|
105
|
+
|
106
|
+
#...
|
107
|
+
end
|
108
|
+
|
109
|
+
# req is a EventMachine::HttpClientOptions object
|
110
|
+
# you can use it like this:
|
111
|
+
# req.body
|
112
|
+
# req.headers
|
113
|
+
# req.uri
|
114
|
+
# req.host
|
115
|
+
# req.port
|
116
|
+
def parse_response(file_name, extra_data, response_header, req)
|
117
|
+
puts req.body
|
118
|
+
puts req.headers
|
119
|
+
puts req.uri
|
120
|
+
puts req.host
|
121
|
+
puts req.port
|
122
|
+
|
123
|
+
#...
|
124
|
+
end
|
125
|
+
|
126
|
+
```
|
127
|
+
|
128
|
+
## And there are many options you can use
|
129
|
+
|
130
|
+
```ruby
|
131
|
+
TaskStruct.new(href, local_path, http_method: :get, params: {}, extra_data: nil, parse_method: nil, header: nil)
|
132
|
+
```
|
133
|
+
|
134
|
+
```ruby
|
135
|
+
#no concurrent limit (note: only use when list size is small)
|
136
|
+
ListSpider.get_list(down_list, interval: 0, max: ListSpider::NO_LIMIT_CONCURRENT)
|
137
|
+
|
138
|
+
#sleep random time, often used in site which limit spider
|
139
|
+
ListSpider.get_list(down_list, interval: ListSpider::RANDOM_TIME, max: 1)
|
140
|
+
|
141
|
+
#set random time range
|
142
|
+
ListSpider.get_list(down_list, interval: (1..10), max: 1)
|
143
|
+
|
144
|
+
```
|
145
|
+
|
146
|
+
###Options below will take effect in the whole program (set them before call get_list)
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
#set proxy
|
150
|
+
ListSpider.set_proxy(proxy_addr, proxy_port, username: nil, password: nil)
|
151
|
+
|
152
|
+
#set http header (if TaskStruct has header it will be used priority)
|
153
|
+
ListSpider.set_header_option(header_option)
|
154
|
+
|
155
|
+
#convert the file encoding to utf-8
|
156
|
+
ListSpider.convert_to_utf8 = true
|
157
|
+
|
158
|
+
#set connect timeout
|
159
|
+
ListSpider.connect_timeout = 2*60
|
160
|
+
|
161
|
+
#over write exist file
|
162
|
+
ListSpider.overwrite_exist = false
|
163
|
+
|
164
|
+
#set redirect depth
|
165
|
+
ListSpider.max_redirects = 10
|
166
|
+
|
167
|
+
```
|
168
|
+
|
169
|
+
## There is a util class to help check or delete unvalid file
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
FileFilter.delete(CustomConfig::DIR + '*', size_threshold: 300)
|
173
|
+
|
174
|
+
FileFilter.check(CustomConfig::DIR + '*', size_threshold: 300)
|
175
|
+
|
176
|
+
FileFilter.check_save_result(CustomConfig::DIR + '*', size_threshold: 300)
|
177
|
+
|
178
|
+
#params
|
179
|
+
FileFilter.delete(dir_pattern, size_threshold: 1000, cust_judge: nil)
|
180
|
+
|
181
|
+
FileFilter.check_save_result(dir_pattern, save_file_name: 'filtered_file.txt', size_threshold: 1000, cust_judge: nil)
|
182
|
+
```
|
183
|
+
|
184
|
+
### License
|
185
|
+
|
186
|
+
(MIT License) - Copyright (c) 2016 Charles Zhang
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'list_spider'
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require 'irb'
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/check_code.sh
ADDED
data/lib/list_spider.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'list_spider/version'
|
1
2
|
require 'em-http-request'
|
2
3
|
require 'nokogiri'
|
3
4
|
require 'fileutils'
|
@@ -108,7 +109,7 @@ module ListSpider
|
|
108
109
|
end
|
109
110
|
end
|
110
111
|
succeed_list << e
|
111
|
-
rescue => e
|
112
|
+
rescue StandardError => e
|
112
113
|
puts e
|
113
114
|
end
|
114
115
|
end
|
@@ -122,7 +123,7 @@ module ListSpider
|
|
122
123
|
if e.http_method == :get
|
123
124
|
ret = SpiderHelper.direct_http_get(e.href, e.local_path, convert_to_utf8: @convert_to_utf8)
|
124
125
|
elsif e.http_method == :post
|
125
|
-
|
126
|
+
ret = SpiderHelper.direct_http_post(e.href, e.local_path, e.params, convert_to_utf8: @convert_to_utf8)
|
126
127
|
end
|
127
128
|
|
128
129
|
if ret
|
@@ -134,7 +135,7 @@ module ListSpider
|
|
134
135
|
|
135
136
|
begin
|
136
137
|
multi.add e.local_path, w
|
137
|
-
rescue => exception
|
138
|
+
rescue StandardError => exception
|
138
139
|
puts exception
|
139
140
|
puts e.href
|
140
141
|
puts e.local_path
|
@@ -248,7 +249,7 @@ module ListSpider
|
|
248
249
|
end
|
249
250
|
|
250
251
|
def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
|
251
|
-
if interval.is_a?Range
|
252
|
+
if interval.is_a? Range
|
252
253
|
@random_time_range = interval
|
253
254
|
interval = RANDOM_TIME
|
254
255
|
end
|
@@ -273,7 +274,7 @@ module ListSpider
|
|
273
274
|
end
|
274
275
|
|
275
276
|
def add_task(task)
|
276
|
-
if task.is_a?Array
|
277
|
+
if task.is_a? Array
|
277
278
|
need_down_list = filter_list(task)
|
278
279
|
@down_list += need_down_list
|
279
280
|
elsif task.is_a?TaskStruct
|
data/lib/spider_helper.rb
CHANGED
@@ -27,7 +27,7 @@ module SpiderHelper
|
|
27
27
|
else
|
28
28
|
puts res
|
29
29
|
end
|
30
|
-
rescue => e
|
30
|
+
rescue StandardError => e
|
31
31
|
puts e.backtrace
|
32
32
|
puts e
|
33
33
|
false
|
@@ -59,7 +59,7 @@ module SpiderHelper
|
|
59
59
|
else
|
60
60
|
puts res
|
61
61
|
end
|
62
|
-
rescue => e
|
62
|
+
rescue StandardError => e
|
63
63
|
puts e
|
64
64
|
false
|
65
65
|
end
|
data/list_spider.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'list_spider/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'list_spider'
|
8
|
+
spec.version = ListSpider::VERSION
|
9
|
+
spec.authors = ['Charles Zhang']
|
10
|
+
spec.email = ['gis05zc@163.com']
|
11
|
+
|
12
|
+
spec.summary = 'List Spider'
|
13
|
+
spec.description = 'A url list spider based on em-http-request.'
|
14
|
+
spec.homepage = 'https://github.com/chinazhangchao/list_spider'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
spec.files =
|
18
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
19
|
+
f.match(%r{^(test|spec|features)/})
|
20
|
+
end
|
21
|
+
spec.bindir = 'exe'
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ['lib']
|
24
|
+
|
25
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
26
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
27
|
+
|
28
|
+
spec.add_dependency 'em-http-request', '~> 1.1', '>= 1.1.3'
|
29
|
+
spec.add_dependency 'nokogiri', '~> 1.6', '>= 1.6.7'
|
30
|
+
spec.add_dependency 'rchardet', '~> 1.6', '>= 1.6.1'
|
31
|
+
end
|
data/spider_example.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'list_spider'
|
2
|
+
# require File.expand_path('../lib/list_spider', __FILE__)
|
3
|
+
|
4
|
+
DOWNLOAD_DIR = 'coolshell/'.freeze
|
5
|
+
|
6
|
+
def parse_index_item(file_name)
|
7
|
+
content = File.read(file_name)
|
8
|
+
doc = Nokogiri::HTML(content)
|
9
|
+
list_group = doc.css('h2.entry-title')
|
10
|
+
link_list = list_group.css('a')
|
11
|
+
|
12
|
+
link_list.each do |link|
|
13
|
+
href = link['href']
|
14
|
+
local_path = DOWNLOAD_DIR + link.content + '.html'
|
15
|
+
ListSpider.add_task(TaskStruct.new(href, local_path))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# ListSpider.convert_to_utf8 = true
|
20
|
+
|
21
|
+
# get_one is a simple function for one taskstruct situation
|
22
|
+
ListSpider.get_one(
|
23
|
+
TaskStruct.new(
|
24
|
+
'https://coolshell.cn/',
|
25
|
+
DOWNLOAD_DIR + 'index.html',
|
26
|
+
parse_method: method(:parse_index_item)
|
27
|
+
),
|
28
|
+
max: 60
|
29
|
+
)
|
data/spider_example_2.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'list_spider'
|
2
|
+
|
3
|
+
DOWNLOAD_DIR = 'coolshell/'.freeze
|
4
|
+
|
5
|
+
@next_list = []
|
6
|
+
|
7
|
+
def parse_index_item(file_name)
|
8
|
+
content = File.read(file_name)
|
9
|
+
doc = Nokogiri::HTML(content)
|
10
|
+
list_group = doc.css('h2.entry-title')
|
11
|
+
link_list = list_group.css('a')
|
12
|
+
|
13
|
+
link_list.each do |link|
|
14
|
+
href = link['href']
|
15
|
+
local_path = DOWNLOAD_DIR + link.content + '.html'
|
16
|
+
# or you can save them to database for later use
|
17
|
+
@next_list << TaskStruct.new(href, local_path)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
task_list = []
|
22
|
+
task_list << TaskStruct.new(
|
23
|
+
'https://coolshell.cn/',
|
24
|
+
DOWNLOAD_DIR + 'index.html',
|
25
|
+
parse_method: method(:parse_index_item)
|
26
|
+
)
|
27
|
+
|
28
|
+
ListSpider.get_list(task_list)
|
29
|
+
ListSpider.get_list(@next_list, max: 60)
|
metadata
CHANGED
@@ -1,15 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: list_spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Charles Zhang
|
8
8
|
autorequire:
|
9
|
-
bindir:
|
9
|
+
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.16'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.16'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
13
41
|
- !ruby/object:Gem::Dependency
|
14
42
|
name: em-http-request
|
15
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,14 +99,27 @@ dependencies:
|
|
71
99
|
- !ruby/object:Gem::Version
|
72
100
|
version: 1.6.1
|
73
101
|
description: A url list spider based on em-http-request.
|
74
|
-
email:
|
102
|
+
email:
|
103
|
+
- gis05zc@163.com
|
75
104
|
executables: []
|
76
105
|
extensions: []
|
77
106
|
extra_rdoc_files: []
|
78
107
|
files:
|
108
|
+
- ".gitignore"
|
109
|
+
- ".rubocop.yml"
|
110
|
+
- Gemfile
|
111
|
+
- README.md
|
112
|
+
- Rakefile
|
113
|
+
- bin/console
|
114
|
+
- bin/setup
|
115
|
+
- check_code.sh
|
79
116
|
- lib/file_filter.rb
|
80
117
|
- lib/list_spider.rb
|
118
|
+
- lib/list_spider/version.rb
|
81
119
|
- lib/spider_helper.rb
|
120
|
+
- list_spider.gemspec
|
121
|
+
- spider_example.rb
|
122
|
+
- spider_example_2.rb
|
82
123
|
homepage: https://github.com/chinazhangchao/list_spider
|
83
124
|
licenses:
|
84
125
|
- MIT
|
@@ -99,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
140
|
version: '0'
|
100
141
|
requirements: []
|
101
142
|
rubyforge_project:
|
102
|
-
rubygems_version: 2.
|
143
|
+
rubygems_version: 2.7.3
|
103
144
|
signing_key:
|
104
145
|
specification_version: 4
|
105
146
|
summary: List Spider
|