RubyGems - list_spider - Versions diffs - 2.0.2 → 2.1.0 - Mend

list_spider 2.0.2 → 2.1.0

Files changed (12) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4a69a63919c971b077855f236631b5b91895e11836338c8970dec65214f92a4c
-  data.tar.gz: 6e0fefdd9a9eba8bfbead303bfbe2e27a0d98171bd60cdae89864f026289c987
+  metadata.gz: 2af55a6c3577dc734aa7ee545cef217059abfc7be4724eaac9cf94126b869b0e
+  data.tar.gz: 48e8f116b91e36613b05958f173a1bc168c0c6daa163fd137266515c3a19c2b7
 SHA512:
-  metadata.gz: 11a2591f94021e7bb3d06bb7a712f98cf8329142150cf09979d44cc65f1803c809e9077d85ad5d41c83457b20c2bf9694a3a65692898145eb7738e18460e40ba
-  data.tar.gz: 3e393c808733042ba3b13a950fcc63f7498641a55d16a5ccae11f3bf1100ca04558e32873bd9063602abd6c72194e8d90492b9b292e47cbd08bfc44476f5417f
+  metadata.gz: 778ae0918059fd2edea3a02081cf479d054521f216afa789f4d9131708b8339486f39b9f7603e303de91f4c03b1bb7ebf30e6b45ac0921fe0c29640743df9e5d
+  data.tar.gz: bcfc6df857085630faf802f3cff9d21653c2d8ced9b2595a3bc92a8093d8883cd470132b770801bc2e4977ad17e5f82aea726a2ad600ab5d1560150dede7c20f

data/.rdoc_options ADDED

@@ -0,0 +1,23 @@
+--- !ruby/object:RDoc::Options
+encoding: UTF-8
+static_path: []
+rdoc_include:
+- "."
+- "/Users/zhangchao/github/list_spider"
+charset: UTF-8
+exclude:
+hyperlink_all: false
+line_numbers: false
+locale:
+locale_dir: locale
+locale_name:
+main_page:
+markup: markdown
+output_decoration: true
+page_dir:
+show_hash: false
+tab_width: 8
+template_stylesheets: []
+title:
+visibility: :protected
+webcvs:

data/.rubocop.yml CHANGED

@@ -18,9 +18,9 @@ Style/Documentation:
   Enabled: false
 Lint/AmbiguousRegexpLiteral:
   Enabled: false
-Lint/DefEndAlignment:
+Layout/DefEndAlignment:
   AutoCorrect: true
-Lint/EndAlignment:
+Layout/EndAlignment:
   AutoCorrect: true
 Style/BracesAroundHashParameters:
   Enabled: false

data/English_README.md ADDED

@@ -0,0 +1,169 @@
+# list_spider
+A url list spider based on em-http-request.
+Many times we only need to spider by url list then parse them and spider again. This is for the purpose.
+## Features
+* Duplicate url filtering (based on local path, so you can custom your behavior).
+* Convert to UTF-8 support.
+* Increased spider support (don't spider exist).
+* Customize concurrent number and interval between task.
+* Http options support.
+## Getting started
+```ruby
+gem install list_spider
+```
+Or add it to your Gemfile
+```ruby
+gem 'list_spider'
+```
+## Use like this
+```ruby
+require 'list_spider'
+DOWNLOAD_DIR = 'coolshell/'.freeze
+@next_list = []
+def parse_index_item(e)
+  content = File.read(e.local_path)
+  doc = Nokogiri::HTML(content)
+  list_group = doc.css('h2.entry-title')
+  link_list = list_group.css('a')
+  link_list.each do |link|
+    href = link['href']
+    local_path = DOWNLOAD_DIR + link.content + '.html'
+    # or you can save them to database for later use
+    @next_list << TaskStruct.new(href, local_path)
+  end
+end
+task_list = []
+task_list << TaskStruct.new(
+  'https://coolshell.cn/',
+  DOWNLOAD_DIR + 'index.html',
+  parse_method: method(:parse_index_item)
+)
+ListSpider.get_list(task_list)
+ListSpider.get_list(@next_list, max: 60)
+```
+## Or in one step
+```ruby
+require 'list_spider'
+DOWNLOAD_DIR = 'coolshell/'.freeze
+def parse_index_item(e)
+  content = File.read(e.local_path)
+  doc = Nokogiri::HTML(content)
+  list_group = doc.css('h2.entry-title')
+  link_list = list_group.css('a')
+  link_list.each do |link|
+    href = link['href']
+    local_path = DOWNLOAD_DIR + link.content + '.html'
+    ListSpider.add_task(TaskStruct.new(href, local_path))
+  end
+end
+# get_one is a simple function for one taskstruct situation
+ListSpider.get_one(
+  TaskStruct.new(
+    'https://coolshell.cn/',
+    DOWNLOAD_DIR + 'index.html',
+    parse_method: method(:parse_index_item)
+  ),
+  max: 60
+)
+```
+## And there are many options you can use
+```ruby
+def initialize(href, # 请求链接
+                 local_path, # 保存数据的本地路径（此路径作为去重标准）
+                 # http方法，取值：:get, :head, :delete, :put, :post, :patch, :options
+                 http_method: :get,
+                 custom_data: nil, # 自定义数据
+                 parse_method: nil, # 解析保存文件的回调，参数是TaskStruct对象本身
+                 # 请求成功后的回调，此时可能没有保存文件，比如301，404
+                 # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
+                 # http.response_header.status 状态码
+                 # http.response_header  返回头
+                 # http.response 返回体
+                 callback: nil,
+                 # 请求失败后的回调
+                 # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
+                 errback: nil,
+                 stream_callback: nil, # 流数据处理回调
+                 convert_to_utf8: false, # 是否转换为utf8编码
+                 overwrite_exist: false, # 是否覆盖现有文件
+                 # request options
+                 redirects: 3, # 重定向次数
+                 keepalive: nil, # （暂不支持复用）
+                 file: nil, # 要上传的文件路径
+                 path: nil, # 请求路径，在流水线方式请求时有用（暂不支持）
+                 query: nil, # 查询字符串，可以是string或hash类型
+                 body: nil, # 请求体，可以是string或hash类型
+                 head: nil, # 请求头
+                 # connection options
+                 connect_timeout: 60, # 连接超时时间
+                 inactivity_timeout: nil, # 连接后超时时间
+                 # ssl设置
+                 # ssl: {
+                 #     :private_key_file => '/tmp/server.key',
+                 #     :cert_chain_file => '/tmp/server.crt',
+                 #     :verify_peer => false
+                 # }
+                 ssl: nil,
+                 # bind: {
+                 #     :host => '123.123.123.123',   # use a specific interface for outbound request
+                 #     :port => '123'
+                 # }
+                 bind: nil,
+                 # 代理设置
+                 # proxy: {
+                 #     :host => '127.0.0.1',    # proxy address
+                 #     :port => 9000,           # proxy port
+                 #     :type => :socks5         # default proxy mode is HTTP proxy, change to :socks5 if required
+                 #     :authorization => ['user', 'pass']  # proxy authorization header
+                 # }
+                 proxy: nil)
+```
+## Callback methods form
+```ruby
+# called when the file is saved successfully
+def parse_eresponse(task_struct)
+  # ...
+end
+def call_back(task_struct, http_req)
+  # http_req is a EventMachine::HttpRequest object
+  # http_req.response_header.status
+  # ...
+end
+def err_back(task_struct, http_req)
+  # ...
+end
+```
+### License
+(MIT License) - Copyright (c) 2016 Charles Zhang

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    list_spider (2.0.1)
+    list_spider (2.0.2)
       em-http-request (~> 1.1, >= 1.1.3)
       nokogiri (~> 1.6, >= 1.6.7)
       rchardet (~> 1.6, >= 1.6.1)

data/README.md CHANGED

@@ -1,186 +1,181 @@
-# list_spider
+# 关于list_spider
-A url list spider based on em-http-request.
+list_spider是一个基于[em-http-request](https://github.com/igrigorik/em-http-request)的爬虫工具。
-Many times we only need to spider by url list then parse them and spider again. This is for the purpose.
+许多情况下，爬虫的工作是爬取链接，解析返回数据，从中提取链接，继续爬取，list_spider就是适用这种场景的爬虫工具。
-## Features
-* Duplicate url filtering (based on local path, so you can custom your behavior).
+## 功能特点
+* 去重过滤 (使用本地文件路径做唯一性校验)。
-* Convert to UTF-8 support.
+* 支持UTF-8编码转换。
-* Increased spider support (don't spider exist).
+* 默认增量爬取，已爬取的不再重复爬取（可以通过选项强制重新获取）。
-* Customize concurrent number and interval between task.
+* 自由设置最大并发数和爬取任务间隔时间。
-* Http options support.
+* 支持http所有选项设置。
-## Getting started
+## 开始
-    gem install list_spider
+```ruby
+gem install list_spider
+```
+或者添加到Gemfile
+```ruby
+gem 'list_spider'
+```
-## Use like this
+## 使用方法
 ```ruby
 require 'list_spider'
-DOWNLOAD_DIR = 'coolshell/'
+DOWNLOAD_DIR = 'coolshell/'.freeze
-$next_list = []
+@next_list = []
-def parse_index_item(file_name)
-  content = File.read(file_name)
+def parse_index_item(e)
+  content = File.read(e.local_path)
   doc = Nokogiri::HTML(content)
-  list_group = doc.css("h2.entry-title")
-  link_list = list_group.css("a")
+  list_group = doc.css('h2.entry-title')
+  link_list = list_group.css('a')
   link_list.each do |link|
     href = link['href']
-    local_path = DOWNLOAD_DIR + link.content + ".html"
-    #or you can save them to database for later use
-    $next_list<< TaskStruct.new(href, local_path)
+    local_path = DOWNLOAD_DIR + link.content + '.html'
+    # 可以存入数据库后续处理
+    @next_list << TaskStruct.new(href, local_path)
   end
 end
 task_list = []
-task_list << TaskStruct.new('https://coolshell.cn/', DOWNLOAD_DIR + 'index.html', parse_method: method(:parse_index_item))
+task_list << TaskStruct.new(
+  'https://coolshell.cn/',
+  DOWNLOAD_DIR + 'index.html',
+  parse_method: method(:parse_index_item)
+)
 ListSpider.get_list(task_list)
-ListSpider.get_list($next_list, max: 60)
+ListSpider.get_list(@next_list, max: 60)
 ```
-## Or in one step
+## 或者使用更简单的一步完成
 ```ruby
 require 'list_spider'
-DOWNLOAD_DIR = 'coolshell/'
+DOWNLOAD_DIR = 'coolshell/'.freeze
-def parse_index_item(file_name)
-  content = File.read(file_name)
+def parse_index_item(e)
+  content = File.read(e.local_path)
   doc = Nokogiri::HTML(content)
-  list_group = doc.css("h2.entry-title")
-  link_list = list_group.css("a")
+  list_group = doc.css('h2.entry-title')
+  link_list = list_group.css('a')
   link_list.each do |link|
     href = link['href']
-    local_path = DOWNLOAD_DIR + link.content + ".html"
+    local_path = DOWNLOAD_DIR + link.content + '.html'
     ListSpider.add_task(TaskStruct.new(href, local_path))
   end
 end
-#get_one is a simple function for one taskstruct situation
-ListSpider.get_one(TaskStruct.new(
-  'https://coolshell.cn/',
-  DOWNLOAD_DIR + 'index.html',
-  parse_method: method(:parse_index_item)),
-max: 60)
+# get_one是封装了get_list的简化形式，方便一个任务时调用
+ListSpider.get_one(
+  TaskStruct.new(
+    'https://coolshell.cn/',
+    DOWNLOAD_DIR + 'index.html',
+    parse_method: method(:parse_index_item)
+  ),
+  max: 60
+)
 ```
-## You can define parse method in four forms
-```ruby
-def parse_response(file_name)
-  #...
-end
-# custom_data is passed by TaskStruct's custom_data param
-def parse_response(file_name, custom_data)
-  #...
-end
-# response_header is a EventMachine::HttpResponseHeader object
-# you can use it like this:
-# response_header.status
-# response_header.cookie
-# response_header['Last-Modified']
-def parse_response(file_name, custom_data, response_header)
-  response_header.status
-  response_header['Last-Modified']
-  #...
-end
-# req is a EventMachine::HttpClientOptions object
-# you can use it like this:
-# req.body
-# req.headers
-# req.uri
-# req.host
-# req.port
-def parse_response(file_name, custom_data, response_header, req)
-  puts req.body
-  puts req.headers
-  puts req.uri
-  puts req.host
-  puts req.port
-  #...
-end
-```
-## And there are many options you can use
-```ruby
-TaskStruct.new(href, local_path, http_method: :get, params: {}, custom_data: nil, parse_method: nil, header: nil)
+## get_list/get_one参数
 ```
+# down_list: 要请求的TaskStruct数组
+# interval: 任务间隔，默认为0。若参数为Range对象，则随机间隔Range范围内的秒数。若设为RANDOM_TIME则随机间隔3到10秒。
+# max: 最大并发数，默认为50。若设为NO_LIMIT_CONCURRENT，则所有请求任务全部一起并发执行
-```ruby
-#no concurrent limit (note: only use when list size is small)
-ListSpider.get_list(down_list, interval: 0, max: ListSpider::NO_LIMIT_CONCURRENT)
-#sleep random time, often used in site which limit spider
-ListSpider.get_list(down_list, interval: ListSpider::RANDOM_TIME, max: 1)
-#set random time range
-ListSpider.get_list(down_list, interval: (1..10), max: 1)
+get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
+get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
 ```
-###Options below will take effect in the whole program (set them before call get_list)
+## 下面是TaskStruct可以设置的选项，与[em-http-request](https://github.com/igrigorik/em-http-request)基本一致
 ```ruby
-#set proxy
-ListSpider.set_proxy(proxy_addr, proxy_port, username: nil, password: nil)
-#set http header (if TaskStruct has header it will be used priority)
-ListSpider.set_header_option(header_option)
-#convert the file encoding to utf-8
-ListSpider.convert_to_utf8 = true
-#set connect timeout
-ListSpider.connect_timeout = 2*60
-#over write exist file
-ListSpider.overwrite_exist = false
-#set redirect depth
-ListSpider.max_redirects = 10
+new(href, # 请求链接
+    local_path, # 保存数据的本地路径（此路径作为去重标准）
+    # http方法，取值：:get, :head, :delete, :put, :post, :patch, :options
+    http_method: :get,
+    custom_data: nil, # 自定义数据
+    parse_method: nil, # 解析保存文件的回调，参数是TaskStruct对象本身
+    # 请求成功后的回调，此时可能没有保存文件，比如301，404
+    # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
+    # http_req.response_header.status 状态码
+    # http_req.response_header  返回头
+    # http_req.response 返回体
+    callback: nil,
+    # 请求失败后的回调
+    # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
+    errback: nil,
+    stream_callback: nil, # 流数据处理回调
+    convert_to_utf8: false, # 是否转换为utf8编码
+    overwrite_exist: false, # 是否覆盖现有文件
+    # 请求设置
+    redirects: 3, # 重定向次数
+    keepalive: nil, # （暂不支持复用）
+    file: nil, # 要上传的文件路径
+    path: nil, # 请求路径，在流水线方式请求时有用（暂不支持）
+    query: nil, # 查询字符串，可以是string或hash类型
+    body: nil, # 请求体，可以是string或hash类型
+    head: nil, # 请求头
+    # 连接设置
+    connect_timeout: 60, # 连接超时时间
+    inactivity_timeout: nil, # 连接后超时时间
+    # ssl设置
+    # ssl: {
+    #     :private_key_file => '/tmp/server.key',
+    #     :cert_chain_file => '/tmp/server.crt',
+    #     :verify_peer => false
+    # }
+    ssl: nil,
+    # bind: {
+    #     :host => '123.123.123.123',   # use a specific interface for outbound request
+    #     :port => '123'
+    # }
+    bind: nil,
+    # 代理设置
+    # proxy: {
+    #     :host => '127.0.0.1',    # proxy address
+    #     :port => 9000,           # proxy port
+    #     :type => :socks5         # default proxy mode is HTTP proxy, change to :socks5 if required
+    #     :authorization => ['user', 'pass']  # proxy authorization header
+    # }
+    proxy: nil)
 ```
-## There is a util class to help check or delete unvalid file
+## 回调函数形式
 ```ruby
-FileFilter.delete(CustomConfig::DIR + '*', size_threshold: 300)
-FileFilter.check(CustomConfig::DIR + '*', size_threshold: 300)
-FileFilter.check_save_result(CustomConfig::DIR + '*', size_threshold: 300)
+# 文件成功保存后调用，通过parse_method参数传入
+def parse_eresponse(task_struct)
+  # ...
+end
-#params
-FileFilter.delete(dir_pattern, size_threshold: 1000, cust_judge: nil)
+# http请求成功后调用，通过callback参数传入
+def call_back(task_struct, http_req)
+  # http_req 是EventMachine::HttpRequest对象
+  # http_req.response_header.status
+  # ...
+end
-FileFilter.check_save_result(dir_pattern, save_file_name: 'filtered_file.txt', size_threshold: 1000, cust_judge: nil)
+# http请求出错后调用，通过errback参数传入
+def err_back(task_struct, http_req)
+  # ...
+end
 ```
-### License
+## License
 (MIT License) - Copyright (c) 2016 Charles Zhang

data/lib/list_spider.rb CHANGED

@@ -4,10 +4,16 @@ require 'nokogiri'
 require 'fileutils'
 require 'set'
 require 'addressable/uri'
-require File.expand_path('../spider_helper', __FILE__)
-require File.expand_path('../file_filter', __FILE__)
+require File.expand_path('spider_helper', __dir__)
+require File.expand_path('file_filter', __dir__)
+# 爬取任务类
 class TaskStruct
+  # * href 请求链接
+  # * local_path 保存数据的本地路径（此路径作为去重标准）
+  # * http_method http方法，取值：:get, :head, :delete, :put, :post, :patch, :options
+  # * custom_data 自定义数据
+  # * parse_method 解析保存文件的回调，参数是TaskStruct对象本身
   def initialize(href, # 请求链接
                  local_path, # 保存数据的本地路径（此路径作为去重标准）
                  # http方法，取值：:get, :head, :delete, :put, :post, :patch, :options
@@ -16,9 +22,9 @@ class TaskStruct
                  parse_method: nil, # 解析保存文件的回调，参数是TaskStruct对象本身
                  # 请求成功后的回调，此时可能没有保存文件，比如301，404
                  # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
-                 # http.response_header.status 状态码
-                 # http.response_header  返回头
-                 # http.response 返回体
+                 # http_req.response_header.status 状态码
+                 # http_req.response_header  返回头
+                 # http_req.response 返回体
                  callback: nil,
                  # 请求失败后的回调
                  # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
@@ -26,7 +32,7 @@ class TaskStruct
                  stream_callback: nil, # 流数据处理回调
                  convert_to_utf8: false, # 是否转换为utf8编码
                  overwrite_exist: false, # 是否覆盖现有文件
-                 # request options
+                 # 请求设置
                  redirects: 3, # 重定向次数
                  keepalive: nil, # （暂不支持复用）
                  file: nil, # 要上传的文件路径
@@ -34,7 +40,7 @@ class TaskStruct
                  query: nil, # 查询字符串，可以是string或hash类型
                  body: nil, # 请求体，可以是string或hash类型
                  head: nil, # 请求头
-                 # connection options
+                 # 连接设置
                  connect_timeout: 60, # 连接超时时间
                  inactivity_timeout: nil, # 连接后超时时间
                  # ssl设置
@@ -112,6 +118,41 @@ module ListSpider
   @local_path_set = Set.new
   class << self
+    def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
+      if interval.is_a? Range
+        @random_time_range = interval
+        interval = RANDOM_TIME
+      end
+      @down_list = filter_list(down_list)
+      @interval = interval
+      @max = max
+      @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
+      @succeed_size = 0
+      @failed_size = 0
+      puts "total size:#{@down_list.size}"
+      event_machine_start_list(next_task, method(:complete))
+    end
+    def get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
+      get_list([task], interval: interval, max: max)
+    end
+    def add_task(task)
+      if task.is_a? Array
+        need_down_list = filter_list(task)
+        @down_list += need_down_list
+      elsif task.is_a?TaskStruct
+        need_down_list = filter_list([task])
+        @down_list += need_down_list
+      else
+        puts "error task type:#{task.class}"
+      end
+    end
+    private
     def event_machine_down(link_struct_list, callback = nil)
       failed_list = []
       succeed_list = []
@@ -247,43 +288,6 @@ module ListSpider
       end
       need_down_list
     end
-    def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
-      if interval.is_a? Range
-        @random_time_range = interval
-        interval = RANDOM_TIME
-      end
-      @down_list = []
-      need_down_list = filter_list(down_list)
-      @down_list += need_down_list
-      @interval = interval
-      @max = max
-      @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
-      @succeed_size = 0
-      @failed_size = 0
-      puts "total size:#{@down_list.size}"
-      event_machine_start_list(next_task, method(:complete))
-    end
-    def get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
-      get_list([task], interval: interval, max: max)
-    end
-    def add_task(task)
-      if task.is_a? Array
-        need_down_list = filter_list(task)
-        @down_list += need_down_list
-      elsif task.is_a?TaskStruct
-        need_down_list = filter_list([task])
-        @down_list += need_down_list
-      else
-        puts "error task type:#{task.class}"
-      end
-    end
   end
   Signal.trap('INT') do

data/lib/list_spider/version.rb CHANGED

@@ -1,3 +1,3 @@
 module ListSpider
-  VERSION = '2.0.2'.freeze
+  VERSION = '2.1.0'.freeze
 end

data/list_spider.gemspec CHANGED

@@ -1,5 +1,5 @@
-lib = File.expand_path('../lib', __FILE__)
+lib = File.expand_path('lib', __dir__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'list_spider/version'
@@ -26,6 +26,6 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency 'rake', '~> 10.0'
   spec.add_dependency 'em-http-request', '~> 1.1', '>= 1.1.3'
-  spec.add_dependency 'nokogiri', '~> 1.6', '>= 1.6.7'
+  spec.add_dependency 'nokogiri', '>= 1.8.5'
   spec.add_dependency 'rchardet', '~> 1.6', '>= 1.6.1'
 end

data/spider_example.rb CHANGED

@@ -1,5 +1,5 @@
-# require 'list_spider'
-require File.expand_path('../lib/list_spider', __FILE__)
+require 'list_spider'
+# require File.expand_path('../lib/list_spider', __FILE__)
 DOWNLOAD_DIR = 'coolshell/'.freeze
@@ -16,8 +16,6 @@ def parse_index_item(e)
   end
 end
-# ListSpider.convert_to_utf8 = true
 # get_one is a simple function for one taskstruct situation
 ListSpider.get_one(
   TaskStruct.new(

data/spider_example_2.rb CHANGED

@@ -4,8 +4,8 @@ DOWNLOAD_DIR = 'coolshell/'.freeze
 @next_list = []
-def parse_index_item(file_name)
-  content = File.read(file_name)
+def parse_index_item(e)
+  content = File.read(e.local_path)
   doc = Nokogiri::HTML(content)
   list_group = doc.css('h2.entry-title')
   link_list = list_group.css('a')

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: list_spider
 version: !ruby/object:Gem::Version
-  version: 2.0.2
+  version: 2.1.0
 platform: ruby
 authors:
 - Charles Zhang
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-02-28 00:00:00.000000000 Z
+date: 2019-06-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -62,22 +62,16 @@ dependencies:
   name: nokogiri
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: '1.6'
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.6.7
+        version: 1.8.5
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: '1.6'
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.6.7
+        version: 1.8.5
 - !ruby/object:Gem::Dependency
   name: rchardet
   requirement: !ruby/object:Gem::Requirement
@@ -106,7 +100,9 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
+- ".rdoc_options"
 - ".rubocop.yml"
+- English_README.md
 - Gemfile
 - Gemfile.lock
 - README.md
@@ -140,8 +136,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.7.3
+rubygems_version: 3.0.1
 signing_key:
 specification_version: 4
 summary: List Spider