loyal_spider 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README.md +4 -0
- data/Rakefile +21 -0
- data/lib/loyal_spider.rb +13 -0
- data/lib/loyal_spider/ables.rb +7 -0
- data/lib/loyal_spider/ables/entity_able.rb +86 -0
- data/lib/loyal_spider/ables/entity_lister_able.rb +119 -0
- data/lib/loyal_spider/ables/fetch_able.rb +130 -0
- data/lib/loyal_spider/ables/fetch_options.rb +38 -0
- data/lib/loyal_spider/ables/fetch_result.rb +34 -0
- data/lib/loyal_spider/clients.rb +13 -0
- data/lib/loyal_spider/clients/haha365/article_entity.rb +11 -0
- data/lib/loyal_spider/clients/haha365/article_entity_lister.rb +94 -0
- data/lib/loyal_spider/clients/kuaile_mahua/article_entity.rb +49 -0
- data/lib/loyal_spider/clients/kuaile_mahua/article_entity_lister.rb +85 -0
- data/lib/loyal_spider/clients/lengxiaohua/article_entity.rb +20 -0
- data/lib/loyal_spider/clients/lengxiaohua/article_entity_lister.rb +107 -0
- data/lib/loyal_spider/clients/xiaohuadi/article_entity.rb +66 -0
- data/lib/loyal_spider/clients/xiaohuadi/article_entity_lister.rb +96 -0
- data/lib/loyal_spider/config.rb +20 -0
- data/lib/loyal_spider/image.rb +1 -0
- data/lib/loyal_spider/utils.rb +4 -0
- data/lib/loyal_spider/utils/array_util.rb +16 -0
- data/lib/loyal_spider/utils/hash_util.rb +29 -0
- data/lib/loyal_spider/version.rb +4 -0
- data/lib/tasks/loyal_spider_tasks.rake +4 -0
- metadata +173 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2013 YOURNAME
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rdoc/task'
|
8
|
+
|
9
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
+
rdoc.rdoc_dir = 'rdoc'
|
11
|
+
rdoc.title = 'LoyalSpider'
|
12
|
+
rdoc.options << '--line-numbers'
|
13
|
+
rdoc.rdoc_files.include('README.rdoc')
|
14
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
Bundler::GemHelper.install_tasks
|
21
|
+
|
data/lib/loyal_spider.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'rest-client'
|
3
|
+
require 'sanitize'
|
4
|
+
require "rails_config"
|
5
|
+
require "nokogiri"
|
6
|
+
|
7
|
+
require "loyal_spider/config"
|
8
|
+
require "loyal_spider/ables"
|
9
|
+
require "loyal_spider/clients"
|
10
|
+
require "loyal_spider/utils"
|
11
|
+
|
12
|
+
module LoyalSpider
|
13
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require "#{File.dirname(__FILE__)}/ables/fetch_able"
|
3
|
+
require "#{File.dirname(__FILE__)}/ables/entity_able"
|
4
|
+
require "#{File.dirname(__FILE__)}/ables/entity_lister_able"
|
5
|
+
require "#{File.dirname(__FILE__)}/ables/fetch_options"
|
6
|
+
require "#{File.dirname(__FILE__)}/ables/fetch_result"
|
7
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module EntityAble
|
4
|
+
# 有返回信息的
|
5
|
+
def self.included base
|
6
|
+
base.class_eval do
|
7
|
+
attr_accessor :title # 标题
|
8
|
+
attr_accessor :url # 标题
|
9
|
+
attr_accessor :content # 正文
|
10
|
+
attr_accessor :tags # 标签
|
11
|
+
attr_accessor :up_rating # 好评数
|
12
|
+
attr_accessor :down_rating # 差评数目
|
13
|
+
attr_accessor :comments_count # 评论数目
|
14
|
+
attr_accessor :authors # 抓取的作者信息
|
15
|
+
attr_accessor :publish_time # 发布时间
|
16
|
+
attr_accessor :errors # 错误
|
17
|
+
|
18
|
+
include InstanceMethods
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module InstanceMethods
|
23
|
+
def initialize attrs={}
|
24
|
+
attrs.each do |key, value|
|
25
|
+
self.send(:"#{key}=", value)
|
26
|
+
end
|
27
|
+
|
28
|
+
self.errors = {}
|
29
|
+
end
|
30
|
+
|
31
|
+
def tags
|
32
|
+
@tags ||= []
|
33
|
+
end
|
34
|
+
|
35
|
+
def tags_array
|
36
|
+
@tags_array ||= self.tags.map{|_tag| _tag[:text] }
|
37
|
+
end
|
38
|
+
|
39
|
+
# 图片
|
40
|
+
def images
|
41
|
+
@images ||= Nokogiri::HTML.parse(self.content).css('img').map do |img_doc|
|
42
|
+
{
|
43
|
+
:src => img_doc.attr('src'),
|
44
|
+
:title => img_doc.attr('title'),
|
45
|
+
:alt => img_doc.attr('alt')
|
46
|
+
}
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def images?
|
51
|
+
self.images.any?
|
52
|
+
end
|
53
|
+
|
54
|
+
def authors
|
55
|
+
@authors ||= []
|
56
|
+
end
|
57
|
+
|
58
|
+
def authors_array
|
59
|
+
@authors_array ||= self.authors.map{|_author| _author[:text] }
|
60
|
+
end
|
61
|
+
|
62
|
+
def valid?
|
63
|
+
self.valid!
|
64
|
+
self.errors.empty?
|
65
|
+
end
|
66
|
+
|
67
|
+
def valid!
|
68
|
+
unless self.content.to_s.strip.size > 0
|
69
|
+
add_error :content, '不能为空'
|
70
|
+
end
|
71
|
+
|
72
|
+
unless self.url.to_s.strip.size > 0
|
73
|
+
add_error :url, '不能为空'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def add_error field, message
|
80
|
+
(self.errors[field] ||= []) << message
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
## -*- encoding : utf-8 -*-
|
3
|
+
module LoyalSpider
|
4
|
+
module EntityListerAble
|
5
|
+
# 有返回信息的
|
6
|
+
def self.included base
|
7
|
+
base.class_eval do
|
8
|
+
attr_writer :current_page # current_page
|
9
|
+
|
10
|
+
include ::LoyalSpider::FetchAble
|
11
|
+
include InstanceMethods
|
12
|
+
extend ClassMethods
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
module ClassMethods
|
17
|
+
# options:
|
18
|
+
# - url_format
|
19
|
+
# - url_format_first
|
20
|
+
# - url_format_options
|
21
|
+
def config_loyal_spider_entity_lister options={}
|
22
|
+
@entity_lister_options ||= options
|
23
|
+
|
24
|
+
self.config_loyal_spider_default_fetch_options(
|
25
|
+
self.entity_lister_options.delete(:fetch_options) || {}
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def paged_fetch page, options={}, &block
|
30
|
+
lister = self.new
|
31
|
+
lister.current_page = page
|
32
|
+
lister.fetch options, &block
|
33
|
+
end
|
34
|
+
|
35
|
+
def entity_lister_options
|
36
|
+
@entity_lister_options ||= {}
|
37
|
+
end
|
38
|
+
|
39
|
+
# 按页抓取
|
40
|
+
def paged_fetch page, options={}, &block
|
41
|
+
self.new.paged_fetch page, options, &block
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
module InstanceMethods
|
46
|
+
def paged_fetch page, options={}, &block
|
47
|
+
|
48
|
+
self.url_format_options.merge!(
|
49
|
+
options[:url_format_options] || {}
|
50
|
+
)
|
51
|
+
|
52
|
+
self.current_page = page
|
53
|
+
self.fetch options, &block
|
54
|
+
end
|
55
|
+
|
56
|
+
def url_format
|
57
|
+
@url_format ||= self.class.entity_lister_options[:url_format]
|
58
|
+
end
|
59
|
+
|
60
|
+
def url_format_first
|
61
|
+
@url_format_first ||= self.class.entity_lister_options[:url_format_first]
|
62
|
+
end
|
63
|
+
|
64
|
+
def url_format_options
|
65
|
+
@url_format_options ||= (self.class.entity_lister_options[:url_format_options] || {})
|
66
|
+
end
|
67
|
+
|
68
|
+
def _before_fetch options={}
|
69
|
+
@entities = []
|
70
|
+
end
|
71
|
+
|
72
|
+
def _after_fetch_success result
|
73
|
+
result.entities = self.entities
|
74
|
+
end
|
75
|
+
|
76
|
+
def entities
|
77
|
+
@entities ||= []
|
78
|
+
end
|
79
|
+
|
80
|
+
# TODO
|
81
|
+
def entity_clazz
|
82
|
+
self.class.entity_lister_options[:entity_clazz]
|
83
|
+
end
|
84
|
+
|
85
|
+
def new_entity attrs={}
|
86
|
+
if self.entity_clazz
|
87
|
+
self.entity_clazz.new(attrs)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def add_entity entity
|
92
|
+
self.entities << entity
|
93
|
+
end
|
94
|
+
|
95
|
+
def current_page
|
96
|
+
@current_page ||= 1
|
97
|
+
end
|
98
|
+
|
99
|
+
def first_page?
|
100
|
+
self.current_page < 2
|
101
|
+
end
|
102
|
+
|
103
|
+
def fetch_url
|
104
|
+
return @fetch_url if defined?(@fetch_url)
|
105
|
+
|
106
|
+
_url_format = self.first_page? ? self.url_format_first : self.url_format
|
107
|
+
|
108
|
+
@fetch_url ||= sprintf(
|
109
|
+
_url_format, (
|
110
|
+
self.url_format_options || {}
|
111
|
+
).merge(
|
112
|
+
:page => self.current_page
|
113
|
+
)
|
114
|
+
)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module FetchAble
|
4
|
+
# 有返回信息的
|
5
|
+
def self.included base
|
6
|
+
base.class_eval do
|
7
|
+
attr_accessor :fetch_options
|
8
|
+
|
9
|
+
include InstanceMethods
|
10
|
+
extend ClassMethods
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module ClassMethods
|
15
|
+
# 配置蜘蛛的抓取配置
|
16
|
+
def config_loyal_spider_default_fetch_options options={}
|
17
|
+
@default_fetch_options ||= options
|
18
|
+
end
|
19
|
+
|
20
|
+
def default_fetch_options
|
21
|
+
@default_fetch_options ||= {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def fetch options={}, &block
|
25
|
+
self.new.fetch options, &block
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module InstanceMethods
|
30
|
+
def base_url
|
31
|
+
self.fetch_options.base_url
|
32
|
+
end
|
33
|
+
|
34
|
+
def fetch options={}, &block
|
35
|
+
self._before_fetch options if self.respond_to?(:_before_fetch, true)
|
36
|
+
self.before_fetch options if self.respond_to?(:before_fetch, true)
|
37
|
+
|
38
|
+
result = _perform_fetch options, &block
|
39
|
+
|
40
|
+
if result.success?
|
41
|
+
self.after_fetch_success(result) if self.respond_to?(:after_fetch_success, true)
|
42
|
+
self._after_fetch_success(result) if self.respond_to?(:_after_fetch_success, true)
|
43
|
+
else
|
44
|
+
self.after_fetch_fail(result) if self.respond_to?(:after_fetch_fail, true)
|
45
|
+
self._after_fetch_fail(result) if self.respond_to?(:_after_fetch_fail, true)
|
46
|
+
end
|
47
|
+
|
48
|
+
result
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def _perform_fetch options={}, &block
|
54
|
+
_fetch_url = self.respond_to?(:fetch_url, true) ? self.fetch_url : self.url
|
55
|
+
|
56
|
+
@fetch_options = ::LoyalSpider::FetchOptions.new(
|
57
|
+
::LoyalSpider::HashUtil.deep_merge(self.class.default_fetch_options, options).merge(
|
58
|
+
:url => _fetch_url
|
59
|
+
)
|
60
|
+
)
|
61
|
+
|
62
|
+
begin
|
63
|
+
::RestClient::Request.execute @fetch_options.net_options do |response, request, response_result, &_block|
|
64
|
+
response_code = response.code
|
65
|
+
|
66
|
+
response_status = if (200..207).include?(response_code)
|
67
|
+
:success
|
68
|
+
elsif (300..307).include?(response_code)
|
69
|
+
:redirect
|
70
|
+
elsif (400..450).include?(response_code)
|
71
|
+
:request_error
|
72
|
+
else
|
73
|
+
:server_error
|
74
|
+
end
|
75
|
+
|
76
|
+
_result = ::LoyalSpider::FetchResult.new(
|
77
|
+
:response_status => response_status,
|
78
|
+
:response_code => response_code,
|
79
|
+
:response => response.force_encoding(self.fetch_options.encoding_type).encode!('UTF-8'),
|
80
|
+
:request => request,
|
81
|
+
:response_result => response_result,
|
82
|
+
:fetch_options => self.fetch_options,
|
83
|
+
:url => _fetch_url
|
84
|
+
)
|
85
|
+
|
86
|
+
if block_given?
|
87
|
+
block.call _result, &_block
|
88
|
+
end
|
89
|
+
|
90
|
+
_result
|
91
|
+
end
|
92
|
+
rescue Exception => exception
|
93
|
+
_error_result = {
|
94
|
+
:response => nil,
|
95
|
+
:request => nil,
|
96
|
+
:response_result => nil,
|
97
|
+
:fetch_options => self.fetch_options,
|
98
|
+
:url => _fetch_url,
|
99
|
+
:exception => exception
|
100
|
+
}
|
101
|
+
|
102
|
+
case exception
|
103
|
+
when ::SocketError
|
104
|
+
_error_result.merge!(
|
105
|
+
:response_status => :socket_error,
|
106
|
+
:response_code => 502
|
107
|
+
)
|
108
|
+
else
|
109
|
+
_error_result.merge!(
|
110
|
+
:response_status => :request_error,
|
111
|
+
:response_code => 400
|
112
|
+
)
|
113
|
+
end
|
114
|
+
|
115
|
+
_result = ::LoyalSpider::FetchResult.new(
|
116
|
+
_error_result
|
117
|
+
)
|
118
|
+
|
119
|
+
if block_given?
|
120
|
+
block.call _result, &_block
|
121
|
+
end
|
122
|
+
|
123
|
+
_result
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class FetchOptions
|
4
|
+
attr_accessor :method # 抓取方法
|
5
|
+
attr_accessor :open_timeout # 打开超时配置
|
6
|
+
attr_accessor :timeout # 打开超时配置
|
7
|
+
attr_accessor :encoding_type # 编码
|
8
|
+
attr_accessor :headers # 请求头
|
9
|
+
attr_accessor :url # url
|
10
|
+
attr_accessor :base_url # base_url
|
11
|
+
|
12
|
+
def initialize attrs={}
|
13
|
+
@url = attrs[:url] || ''
|
14
|
+
@base_url = attrs[:base_url] || ''
|
15
|
+
@method = attrs[:method] || :get
|
16
|
+
@timeout = attrs[:timeout] || 60 # 单位秒
|
17
|
+
@open_timeout = attrs[:open_time] || @timeout
|
18
|
+
@encoding_type = attrs[:encoding_type] || 'UTF-8'
|
19
|
+
@headers = {
|
20
|
+
:accept_charset => 'UTF-8,*;q=0.5',
|
21
|
+
:accept_encoding => 'gzip,deflate,sdch',
|
22
|
+
:user_agent => 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17'
|
23
|
+
}.merge(attrs[:headers] || {})
|
24
|
+
end
|
25
|
+
|
26
|
+
def net_options
|
27
|
+
{
|
28
|
+
:url => self.url,
|
29
|
+
:method => self.method,
|
30
|
+
:timeout => self.timeout,
|
31
|
+
:open_timeout => self.open_timeout,
|
32
|
+
:encoding_type => self.encoding_type,
|
33
|
+
:headers => self.headers
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class FetchResult
|
4
|
+
|
5
|
+
attr_accessor :response_status
|
6
|
+
attr_accessor :response_code
|
7
|
+
attr_accessor :response
|
8
|
+
attr_accessor :request
|
9
|
+
attr_accessor :response_result
|
10
|
+
attr_accessor :fetch_options
|
11
|
+
attr_accessor :url
|
12
|
+
attr_accessor :exception
|
13
|
+
attr_accessor :entities
|
14
|
+
|
15
|
+
def initialize attrs={}
|
16
|
+
attrs.each do |key, value|
|
17
|
+
self.send(:"#{key}=", value)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def success?
|
22
|
+
self.response_status == :success
|
23
|
+
end
|
24
|
+
|
25
|
+
def fail?
|
26
|
+
!success?
|
27
|
+
end
|
28
|
+
|
29
|
+
def response_html_doc
|
30
|
+
@response_html_doc ||= Nokogiri::HTML.parse(self.response) if self.response
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require "#{File.dirname(__FILE__)}/clients/kuaile_mahua/article_entity"
|
3
|
+
require "#{File.dirname(__FILE__)}/clients/kuaile_mahua/article_entity_lister"
|
4
|
+
|
5
|
+
require "#{File.dirname(__FILE__)}/clients/haha365/article_entity"
|
6
|
+
require "#{File.dirname(__FILE__)}/clients/haha365/article_entity_lister"
|
7
|
+
|
8
|
+
require "#{File.dirname(__FILE__)}/clients/lengxiaohua/article_entity"
|
9
|
+
require "#{File.dirname(__FILE__)}/clients/lengxiaohua/article_entity_lister"
|
10
|
+
|
11
|
+
require "#{File.dirname(__FILE__)}/clients/xiaohuadi/article_entity"
|
12
|
+
require "#{File.dirname(__FILE__)}/clients/xiaohuadi/article_entity_lister"
|
13
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Haha365
|
5
|
+
class ArticleEntityLister
|
6
|
+
include ::LoyalSpider::EntityListerAble
|
7
|
+
|
8
|
+
self.config_loyal_spider_entity_lister :url_format_first => 'http://www.haha365.com/%{category}/',
|
9
|
+
:url_format => 'http://www.haha365.com/%{category}/index_%{page}.htm',
|
10
|
+
:entity_clazz => ::LoyalSpider::Clients::Haha365::ArticleEntity,
|
11
|
+
:fetch_options => {
|
12
|
+
:encoding_type => 'GBK',
|
13
|
+
:base_url => 'http://www.haha365.com'
|
14
|
+
}
|
15
|
+
|
16
|
+
# TODO:
|
17
|
+
def before_fetch options={}
|
18
|
+
puts "before_fetch: #{options}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# TODO
|
22
|
+
def after_fetch_success result
|
23
|
+
# puts "after_fetch success: #{result}"
|
24
|
+
html_doc = result.response_html_doc
|
25
|
+
|
26
|
+
# _basic_doc = html_doc.css('html #main .content .left .r_c .cat_llb')
|
27
|
+
_basic_doc = html_doc.css('html #main .content .left .r_c')
|
28
|
+
|
29
|
+
# :content # 正文
|
30
|
+
# :tags # 标签
|
31
|
+
# :tags_text # 标签
|
32
|
+
# :up_rating # 好评数
|
33
|
+
# :down_rating # 差评数目
|
34
|
+
# :comments_count # 评论数目
|
35
|
+
# :authors # 抓取的作者信息
|
36
|
+
|
37
|
+
(0...(_basic_doc.css('.cat_llb .fl a').size)).each do |_index|
|
38
|
+
|
39
|
+
_title_doc = _basic_doc.css('.cat_llb h3 a')[_index]
|
40
|
+
_content_doc = _basic_doc.css('.cat_llb #endtext')[_index]
|
41
|
+
_category_doc = _basic_doc.css('.cat_llb .fl a')[_index]
|
42
|
+
|
43
|
+
if _title_doc.nil? || _content_doc.nil? || _category_doc.nil?
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
47
|
+
_entity_attr = {}
|
48
|
+
|
49
|
+
_text_content = _content_doc.try :inner_html
|
50
|
+
|
51
|
+
_content = _text_content.to_s.split("<br>\r\n").map do |_cnt|
|
52
|
+
"<p>#{(Sanitize.clean _cnt).to_s.strip}</p>"
|
53
|
+
end.join('')
|
54
|
+
|
55
|
+
_entity_attr[:content] = _content
|
56
|
+
|
57
|
+
_entity_attr[:url] = "#{self.base_url}#{_title_doc.attr('href')}"
|
58
|
+
_entity_attr[:title] = "#{_title_doc.text}"
|
59
|
+
|
60
|
+
if _category_doc
|
61
|
+
_entity_attr[:tags] = [
|
62
|
+
{
|
63
|
+
:text => _category_doc.text,
|
64
|
+
:href => "#{self.base_url}#{_category_doc.attr('href')}"
|
65
|
+
}
|
66
|
+
]
|
67
|
+
else
|
68
|
+
_entity_attr[:tags] = []
|
69
|
+
end
|
70
|
+
|
71
|
+
_entity_attr[:authors] = []
|
72
|
+
|
73
|
+
_entity_attr[:up_rating] = -1
|
74
|
+
_entity_attr[:down_rating] = -1
|
75
|
+
_entity_attr[:comments_count] = -1
|
76
|
+
|
77
|
+
_entity = self.new_entity(_entity_attr)
|
78
|
+
|
79
|
+
if _entity.valid?
|
80
|
+
self.add_entity _entity
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# debugger
|
85
|
+
end
|
86
|
+
|
87
|
+
def after_fetch_fail result
|
88
|
+
puts "after_fetch fail: #{result}"
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module KuaileMahua
|
5
|
+
class ArticleEntity
|
6
|
+
include ::LoyalSpider::EntityAble
|
7
|
+
include ::LoyalSpider::FetchAble
|
8
|
+
|
9
|
+
self.config_loyal_spider_default_fetch_options(
|
10
|
+
:encoding_type => 'GBK',
|
11
|
+
:base_url => 'http://www.kl688.com'
|
12
|
+
)
|
13
|
+
|
14
|
+
# TODO
|
15
|
+
def after_fetch_success result
|
16
|
+
# puts "after_fetch success: #{result}"
|
17
|
+
html_doc = result.response_html_doc
|
18
|
+
entity_doc = html_doc.css('.main .main-left .xiaohua .xiaohua-data')
|
19
|
+
|
20
|
+
self.title = entity_doc.css('h1').first.text.to_s.strip
|
21
|
+
self.content = entity_doc.css('.content').inner_html
|
22
|
+
self.tags = entity_doc.css('.link .tags h4 a').map do |_tag_doc|
|
23
|
+
{
|
24
|
+
:text => _tag_doc.text.to_s.strip,
|
25
|
+
:href => "#{self.base_url}#{_tag_doc.attr('href').to_s.strip}"
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
self.tags_text = self.tags.map do |_tag|
|
30
|
+
_tag[:text]
|
31
|
+
end
|
32
|
+
|
33
|
+
self.authors = entity_doc.css('.link .tags .pusher a').map do |_author_doc|
|
34
|
+
{
|
35
|
+
:text => _author_doc.text.to_s.strip,
|
36
|
+
:href => "#{_author_doc.attr('href').to_s.strip}"
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
self.up_rating = entity_doc.css('.tools li a.good').text.to_i
|
41
|
+
self.down_rating = entity_doc.css('.tools li a.bad').text.to_i
|
42
|
+
self.comments_count = entity_doc.css('.tools li s').first.text.to_i
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module KuaileMahua
|
5
|
+
class ArticleEntityLister
|
6
|
+
include ::LoyalSpider::EntityListerAble
|
7
|
+
|
8
|
+
self.config_loyal_spider_entity_lister :url_format_first => 'http://www.kl688.com/',
|
9
|
+
:url_format => 'http://www.kl688.com/newjokes/index_%{page}.htm',
|
10
|
+
:entity_clazz => ::LoyalSpider::Clients::KuaileMahua::ArticleEntity,
|
11
|
+
:fetch_options => {
|
12
|
+
:encoding_type => 'GBK',
|
13
|
+
:base_url => 'http://www.kl688.com'
|
14
|
+
}
|
15
|
+
|
16
|
+
# TODO:
|
17
|
+
def before_fetch options={}
|
18
|
+
puts "before_fetch: #{options}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# TODO
|
22
|
+
def after_fetch_success result
|
23
|
+
# puts "after_fetch success: #{result}"
|
24
|
+
html_doc = result.response_html_doc
|
25
|
+
|
26
|
+
html_doc.css('.main .main-left .xiaohua').each do |entity_doc|
|
27
|
+
_entity_attr = {}
|
28
|
+
|
29
|
+
_fetch_options = result.fetch_options
|
30
|
+
_base_url = _fetch_options.base_url.to_s.strip
|
31
|
+
|
32
|
+
_title_link = entity_doc.css('h3 a').first
|
33
|
+
|
34
|
+
# :content # 正文
|
35
|
+
# :tags # 标签
|
36
|
+
# :tags_text # 标签
|
37
|
+
# :up_rating # 好评数
|
38
|
+
# :down_rating # 差评数目
|
39
|
+
# :comments_count # 评论数目
|
40
|
+
# :authors # 抓取的作者信息
|
41
|
+
|
42
|
+
_entity_attr[:url] = "#{_base_url}#{_title_link.attr('href').to_s.strip}"
|
43
|
+
|
44
|
+
_entity_attr[:title] = _title_link.text
|
45
|
+
_entity_attr[:content] = entity_doc.css('.content').inner_html
|
46
|
+
_entity_attr[:tags] = entity_doc.css('.link .tags h4 a').map do |_tag_doc|
|
47
|
+
{
|
48
|
+
:text => _tag_doc.text.to_s.strip,
|
49
|
+
:href => "#{_base_url}#{_tag_doc.attr('href').to_s.strip}"
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
_entity_attr[:authors] = entity_doc.css('.link .tags .pusher a').map do |_author_doc|
|
54
|
+
{
|
55
|
+
:text => _author_doc.text.to_s.strip,
|
56
|
+
:href => "#{_author_doc.attr('href').to_s.strip}"
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
_entity_attr[:up_rating] = entity_doc.css('.tools li a.good').text.to_i
|
61
|
+
_entity_attr[:down_rating] = entity_doc.css('.tools li a.bad').text.to_i
|
62
|
+
_entity_attr[:comments_count] = entity_doc.css('.tools li s').first.text.to_i
|
63
|
+
|
64
|
+
_entity = self.new_entity(_entity_attr)
|
65
|
+
|
66
|
+
if entity_doc.css('.content .more a').any?
|
67
|
+
_entity.fetch
|
68
|
+
end
|
69
|
+
|
70
|
+
if _entity.valid?
|
71
|
+
self.add_entity _entity
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# debugger
|
76
|
+
end
|
77
|
+
|
78
|
+
def after_fetch_fail result
|
79
|
+
puts "after_fetch fail: #{result}"
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Lengxiaohua
|
5
|
+
class ArticleEntity
|
6
|
+
include ::LoyalSpider::EntityAble
|
7
|
+
# include ::LoyalSpider::FetchAble
|
8
|
+
|
9
|
+
# self.config_loyal_spider_default_fetch_options(
|
10
|
+
# :encoding_type => 'UTF-8',
|
11
|
+
# :base_url => 'http://lengxiaohua.com'
|
12
|
+
# )
|
13
|
+
|
14
|
+
def valid?
|
15
|
+
super
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Lengxiaohua
|
5
|
+
class ArticleEntityLister
|
6
|
+
include ::LoyalSpider::EntityListerAble
|
7
|
+
|
8
|
+
self.config_loyal_spider_entity_lister :url_format_first => 'http://lengxiaohua.com/',
|
9
|
+
:url_format => 'http://lengxiaohua.com/?page_num=%{page}',
|
10
|
+
:entity_clazz => ::LoyalSpider::Clients::Lengxiaohua::ArticleEntity,
|
11
|
+
:fetch_options => {
|
12
|
+
:encoding_type => 'UTF-8',
|
13
|
+
:base_url => 'http://lengxiaohua.com'
|
14
|
+
}
|
15
|
+
|
16
|
+
# TODO:
|
17
|
+
def before_fetch options={}
|
18
|
+
puts "before_fetch: #{options}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# TODO
|
22
|
+
def after_fetch_success result
|
23
|
+
# puts "after_fetch success: #{result}"
|
24
|
+
html_doc = result.response_html_doc
|
25
|
+
|
26
|
+
html_doc.css('.joke_wrap li.joke_li').each do |entity_doc|
|
27
|
+
_entity_attr = {}
|
28
|
+
|
29
|
+
_fetch_options = result.fetch_options
|
30
|
+
_base_url = _fetch_options.base_url.to_s.strip
|
31
|
+
|
32
|
+
# :content # 正文
|
33
|
+
# :tags # 标签
|
34
|
+
# :tags_text # 标签
|
35
|
+
# :up_rating # 好评数
|
36
|
+
# :down_rating # 差评数目
|
37
|
+
# :comments_count # 评论数目
|
38
|
+
# :authors # 抓取的作者信息
|
39
|
+
|
40
|
+
_joke_id = entity_doc.css('.para_info .para_tool a').first.attr('jokeid')
|
41
|
+
|
42
|
+
_entity_attr[:url] = "#{_base_url}/joke/#{_joke_id}"
|
43
|
+
|
44
|
+
_entity_attr[:title] = ''
|
45
|
+
|
46
|
+
_text_content = entity_doc.css('.para_can pre').first.inner_html
|
47
|
+
|
48
|
+
_content = Sanitize.clean(_text_content).split(/\n/).map do |_cnt|
|
49
|
+
"<p>#{_cnt}</p>"
|
50
|
+
end.join('')
|
51
|
+
|
52
|
+
if _img_box = entity_doc.css('.default_load_imgbox').first
|
53
|
+
|
54
|
+
_image_content = _img_box.css('img').map do |_img|
|
55
|
+
"<img src='#{_img.attr('data-original').to_s.gsub('!water', '')}'/>"
|
56
|
+
end
|
57
|
+
|
58
|
+
_content = _content + "<p>#{_image_content.join('')}</p>" if _image_content.any?
|
59
|
+
end
|
60
|
+
|
61
|
+
_entity_attr[:content] = _content
|
62
|
+
|
63
|
+
_entity_attr[:tags] = entity_doc.css('.tag_box a').map do |_tag_doc|
|
64
|
+
{
|
65
|
+
:text => _tag_doc.text,
|
66
|
+
:href => "#{self.base_url}#{_tag_doc.attr('href')}"
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
# debugger
|
71
|
+
|
72
|
+
_author_doc = entity_doc.css('.para_info .user_info a').first
|
73
|
+
|
74
|
+
if _author_doc
|
75
|
+
_entity_attr[:authors] = [
|
76
|
+
{
|
77
|
+
:text => _author_doc.text.to_s.strip,
|
78
|
+
:href => "#{self.base_url}#{_author_doc.attr('href').to_s.strip}"
|
79
|
+
}
|
80
|
+
]
|
81
|
+
else
|
82
|
+
_entity_attr[:authors] = []
|
83
|
+
end
|
84
|
+
|
85
|
+
_tool_doc = entity_doc.css('.para_tool')
|
86
|
+
|
87
|
+
_entity_attr[:up_rating] = _tool_doc.css('a[report=like_joke] span').last.text.gsub(/\W/, '').to_i
|
88
|
+
_entity_attr[:down_rating] = _tool_doc.css('a[report=unlike_joke] span').last.text.gsub(/\W/, '').to_i
|
89
|
+
_entity_attr[:comments_count] = _tool_doc.css("#show_comment_count_#{_joke_id}").text.gsub(/\W/, '').to_i
|
90
|
+
|
91
|
+
_entity = self.new_entity(_entity_attr)
|
92
|
+
|
93
|
+
if _entity.valid?
|
94
|
+
self.add_entity _entity
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
def after_fetch_fail result
|
101
|
+
puts "after_fetch fail: #{result}"
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Xiaohuadi
|
5
|
+
class ArticleEntity
|
6
|
+
include ::LoyalSpider::EntityAble
|
7
|
+
include ::LoyalSpider::FetchAble
|
8
|
+
|
9
|
+
self.config_loyal_spider_default_fetch_options(
|
10
|
+
:encoding_type => 'GBK',
|
11
|
+
:base_url => 'http://www.xiaohuadi.com'
|
12
|
+
)
|
13
|
+
|
14
|
+
# TODO
|
15
|
+
def after_fetch_success result
|
16
|
+
# puts "after_fetch success: #{result}"
|
17
|
+
entity_doc = result.response_html_doc.css('.listx')
|
18
|
+
|
19
|
+
_fetch_options = self.fetch_options
|
20
|
+
_base_url = _fetch_options.base_url.to_s.strip
|
21
|
+
|
22
|
+
# :content # 正文
|
23
|
+
# :tags # 标签
|
24
|
+
# :tags_text # 标签
|
25
|
+
# :up_rating # 好评数
|
26
|
+
# :down_rating # 差评数目
|
27
|
+
# :comments_count # 评论数目
|
28
|
+
# :authors # 抓取的作者信息
|
29
|
+
|
30
|
+
_text_doc = entity_doc.css('.sonxltitle h1')
|
31
|
+
|
32
|
+
self.title = "#{_text_doc.text}"
|
33
|
+
|
34
|
+
_text_content = entity_doc.css('.sonxlarticle').first.inner_html
|
35
|
+
|
36
|
+
_content = _text_content.split("<br>\r\n").map do |_cnt|
|
37
|
+
"<p>#{Sanitize.clean _cnt}</p>"
|
38
|
+
end.join('')
|
39
|
+
|
40
|
+
self.content = _content
|
41
|
+
|
42
|
+
_category_doc = entity_doc.css('.sonxlPosition a').last
|
43
|
+
|
44
|
+
if _category_doc
|
45
|
+
self.tags = [
|
46
|
+
{
|
47
|
+
:text => _category_doc.text,
|
48
|
+
:href => "#{self.base_url}#{_category_doc.attr('href')}"
|
49
|
+
}
|
50
|
+
]
|
51
|
+
else
|
52
|
+
self.tags = []
|
53
|
+
end
|
54
|
+
|
55
|
+
self.authors = []
|
56
|
+
|
57
|
+
self.up_rating = -1
|
58
|
+
self.down_rating = -1
|
59
|
+
self.comments_count = -1
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Xiaohuadi
|
5
|
+
class ArticleEntityLister
|
6
|
+
include ::LoyalSpider::EntityListerAble
|
7
|
+
|
8
|
+
self.config_loyal_spider_entity_lister :url_format_first => 'http://www.xiaohuadi.com/%{category}/',
|
9
|
+
:url_format => 'http://www.xiaohuadi.com/%{category}/index_%{page}.html',
|
10
|
+
:entity_clazz => ::LoyalSpider::Clients::Xiaohuadi::ArticleEntity,
|
11
|
+
:fetch_options => {
|
12
|
+
:encoding_type => 'GBK',
|
13
|
+
:base_url => 'http://www.xiaohuadi.com'
|
14
|
+
}
|
15
|
+
|
16
|
+
# TODO:
|
17
|
+
def before_fetch options={}
|
18
|
+
puts "before_fetch: #{options}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# TODO
|
22
|
+
def after_fetch_success result
|
23
|
+
# puts "after_fetch success: #{result}"
|
24
|
+
html_doc = result.response_html_doc
|
25
|
+
|
26
|
+
html_doc.css('.ilistxllist>ul').each do |entity_doc|
|
27
|
+
_entity_attr = {}
|
28
|
+
|
29
|
+
_fetch_options = result.fetch_options
|
30
|
+
_base_url = _fetch_options.base_url.to_s.strip
|
31
|
+
|
32
|
+
# :content # 正文
|
33
|
+
# :tags # 标签
|
34
|
+
# :tags_text # 标签
|
35
|
+
# :up_rating # 好评数
|
36
|
+
# :down_rating # 差评数目
|
37
|
+
# :comments_count # 评论数目
|
38
|
+
# :authors # 抓取的作者信息
|
39
|
+
|
40
|
+
_link_doc = entity_doc.css('.ilistxlctlB1 a')
|
41
|
+
|
42
|
+
_entity_attr[:url] = "#{_base_url}#{_link_doc.attr('href')}"
|
43
|
+
|
44
|
+
_entity_attr[:title] = "#{_link_doc.text}"
|
45
|
+
|
46
|
+
_text_content = entity_doc.css('.ilistxlctlB2').first.inner_html
|
47
|
+
|
48
|
+
_content = _text_content.split("<br>\r\n").map do |_cnt|
|
49
|
+
"<p>#{Sanitize.clean _cnt}</p>"
|
50
|
+
end.join('')
|
51
|
+
|
52
|
+
_entity_attr[:content] = _content
|
53
|
+
|
54
|
+
_category_doc = entity_doc.css('.ilistxlctlC table td a').last
|
55
|
+
|
56
|
+
if _category_doc
|
57
|
+
_entity_attr[:tags] = [
|
58
|
+
{
|
59
|
+
:text => _category_doc.text,
|
60
|
+
:href => "#{self.base_url}#{_category_doc.attr('href')}"
|
61
|
+
}
|
62
|
+
]
|
63
|
+
else
|
64
|
+
_entity_attr[:tags] = []
|
65
|
+
end
|
66
|
+
|
67
|
+
_entity_attr[:authors] = []
|
68
|
+
|
69
|
+
_tool_doc = entity_doc.css('.ilistxlctlA ul li')
|
70
|
+
|
71
|
+
_entity_attr[:up_rating] = _tool_doc[1].text.to_i
|
72
|
+
_entity_attr[:down_rating] = _tool_doc[2].text.to_i
|
73
|
+
_entity_attr[:comments_count] = _tool_doc[0].text.to_i
|
74
|
+
|
75
|
+
_entity = self.new_entity(_entity_attr)
|
76
|
+
|
77
|
+
if _entity.content.include?('未显示完,查看全文')
|
78
|
+
_entity.fetch
|
79
|
+
end
|
80
|
+
|
81
|
+
if _entity.valid?
|
82
|
+
self.add_entity _entity
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# debugger
|
87
|
+
end
|
88
|
+
|
89
|
+
def after_fetch_fail result
|
90
|
+
puts "after_fetch fail: #{result}"
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class << self
|
4
|
+
attr_writer :config
|
5
|
+
|
6
|
+
def config
|
7
|
+
@config ||= Config.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def configure
|
11
|
+
yield self.config ||= Config.new
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
class Config
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class ArrayUtil
|
4
|
+
def self.extract_options!(arr)
|
5
|
+
if arr.last.is_a?(Hash)
|
6
|
+
arr.pop
|
7
|
+
else
|
8
|
+
{}
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.init args
|
13
|
+
args.is_a?(Array) ? args : (args.nil? ? [] : [args])
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class HashUtil
|
4
|
+
def self.deep_merge!(a_hash, b_hash)
|
5
|
+
b_hash.each_pair do |k,v|
|
6
|
+
tv = a_hash[k]
|
7
|
+
a_hash[k] = tv.is_a?(Hash) && v.is_a?(Hash) ? self.deep_merge(a_hash, v) : v
|
8
|
+
end
|
9
|
+
|
10
|
+
a_hash
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.deep_merge a_hash, b_hash
|
14
|
+
self.deep_merge! self.deep_dup(a_hash), b_hash
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.deep_dup hash
|
18
|
+
duplicate = hash.dup
|
19
|
+
|
20
|
+
duplicate.each_pair do |k,v|
|
21
|
+
tv = duplicate[k]
|
22
|
+
duplicate[k] = tv.is_a?(Hash) && v.is_a?(Hash) ? (self.deep_dup(tv)) : v
|
23
|
+
end
|
24
|
+
|
25
|
+
duplicate
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: loyal_spider
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- happy
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ">="
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rest-client
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: sanitize
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rails_config
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: nokogiri
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
description: Description of LoyalSpider.
|
111
|
+
email:
|
112
|
+
- andywang7259@gmail.com
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- lib/loyal_spider/ables/fetch_options.rb
|
118
|
+
- lib/loyal_spider/ables/fetch_able.rb
|
119
|
+
- lib/loyal_spider/ables/fetch_result.rb
|
120
|
+
- lib/loyal_spider/ables/entity_lister_able.rb
|
121
|
+
- lib/loyal_spider/ables/entity_able.rb
|
122
|
+
- lib/loyal_spider/utils/hash_util.rb
|
123
|
+
- lib/loyal_spider/utils/array_util.rb
|
124
|
+
- lib/loyal_spider/config.rb
|
125
|
+
- lib/loyal_spider/utils.rb
|
126
|
+
- lib/loyal_spider/version.rb
|
127
|
+
- lib/loyal_spider/image.rb
|
128
|
+
- lib/loyal_spider/clients.rb
|
129
|
+
- lib/loyal_spider/clients/kuaile_mahua/article_entity.rb
|
130
|
+
- lib/loyal_spider/clients/kuaile_mahua/article_entity_lister.rb
|
131
|
+
- lib/loyal_spider/clients/xiaohuadi/article_entity.rb
|
132
|
+
- lib/loyal_spider/clients/xiaohuadi/article_entity_lister.rb
|
133
|
+
- lib/loyal_spider/clients/haha365/article_entity.rb
|
134
|
+
- lib/loyal_spider/clients/haha365/article_entity_lister.rb
|
135
|
+
- lib/loyal_spider/clients/lengxiaohua/article_entity.rb
|
136
|
+
- lib/loyal_spider/clients/lengxiaohua/article_entity_lister.rb
|
137
|
+
- lib/loyal_spider/ables.rb
|
138
|
+
- lib/loyal_spider.rb
|
139
|
+
- lib/tasks/loyal_spider_tasks.rake
|
140
|
+
- MIT-LICENSE
|
141
|
+
- Rakefile
|
142
|
+
- README.md
|
143
|
+
homepage: http://github.com/xiuxian123
|
144
|
+
licenses: []
|
145
|
+
post_install_message:
|
146
|
+
rdoc_options: []
|
147
|
+
require_paths:
|
148
|
+
- lib
|
149
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
150
|
+
none: false
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
segments:
|
156
|
+
- 0
|
157
|
+
hash: -2402893976680931226
|
158
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
159
|
+
none: false
|
160
|
+
requirements:
|
161
|
+
- - ">="
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0'
|
164
|
+
segments:
|
165
|
+
- 0
|
166
|
+
hash: -2402893976680931226
|
167
|
+
requirements: []
|
168
|
+
rubyforge_project:
|
169
|
+
rubygems_version: 1.8.25
|
170
|
+
signing_key:
|
171
|
+
specification_version: 3
|
172
|
+
summary: Summary of LoyalSpider.
|
173
|
+
test_files: []
|