loyal_spider 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README.md +4 -0
- data/Rakefile +21 -0
- data/lib/loyal_spider.rb +13 -0
- data/lib/loyal_spider/ables.rb +7 -0
- data/lib/loyal_spider/ables/entity_able.rb +86 -0
- data/lib/loyal_spider/ables/entity_lister_able.rb +119 -0
- data/lib/loyal_spider/ables/fetch_able.rb +130 -0
- data/lib/loyal_spider/ables/fetch_options.rb +38 -0
- data/lib/loyal_spider/ables/fetch_result.rb +34 -0
- data/lib/loyal_spider/clients.rb +13 -0
- data/lib/loyal_spider/clients/haha365/article_entity.rb +11 -0
- data/lib/loyal_spider/clients/haha365/article_entity_lister.rb +94 -0
- data/lib/loyal_spider/clients/kuaile_mahua/article_entity.rb +49 -0
- data/lib/loyal_spider/clients/kuaile_mahua/article_entity_lister.rb +85 -0
- data/lib/loyal_spider/clients/lengxiaohua/article_entity.rb +20 -0
- data/lib/loyal_spider/clients/lengxiaohua/article_entity_lister.rb +107 -0
- data/lib/loyal_spider/clients/xiaohuadi/article_entity.rb +66 -0
- data/lib/loyal_spider/clients/xiaohuadi/article_entity_lister.rb +96 -0
- data/lib/loyal_spider/config.rb +20 -0
- data/lib/loyal_spider/image.rb +1 -0
- data/lib/loyal_spider/utils.rb +4 -0
- data/lib/loyal_spider/utils/array_util.rb +16 -0
- data/lib/loyal_spider/utils/hash_util.rb +29 -0
- data/lib/loyal_spider/version.rb +4 -0
- data/lib/tasks/loyal_spider_tasks.rake +4 -0
- metadata +173 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2013 YOURNAME
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rdoc/task'
|
8
|
+
|
9
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
+
rdoc.rdoc_dir = 'rdoc'
|
11
|
+
rdoc.title = 'LoyalSpider'
|
12
|
+
rdoc.options << '--line-numbers'
|
13
|
+
rdoc.rdoc_files.include('README.rdoc')
|
14
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
Bundler::GemHelper.install_tasks
|
21
|
+
|
data/lib/loyal_spider.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'rest-client'
|
3
|
+
require 'sanitize'
|
4
|
+
require "rails_config"
|
5
|
+
require "nokogiri"
|
6
|
+
|
7
|
+
require "loyal_spider/config"
|
8
|
+
require "loyal_spider/ables"
|
9
|
+
require "loyal_spider/clients"
|
10
|
+
require "loyal_spider/utils"
|
11
|
+
|
12
|
+
module LoyalSpider
|
13
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require "#{File.dirname(__FILE__)}/ables/fetch_able"
|
3
|
+
require "#{File.dirname(__FILE__)}/ables/entity_able"
|
4
|
+
require "#{File.dirname(__FILE__)}/ables/entity_lister_able"
|
5
|
+
require "#{File.dirname(__FILE__)}/ables/fetch_options"
|
6
|
+
require "#{File.dirname(__FILE__)}/ables/fetch_result"
|
7
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module EntityAble
|
4
|
+
# 有返回信息的
|
5
|
+
def self.included base
|
6
|
+
base.class_eval do
|
7
|
+
attr_accessor :title # 标题
|
8
|
+
attr_accessor :url # 标题
|
9
|
+
attr_accessor :content # 正文
|
10
|
+
attr_accessor :tags # 标签
|
11
|
+
attr_accessor :up_rating # 好评数
|
12
|
+
attr_accessor :down_rating # 差评数目
|
13
|
+
attr_accessor :comments_count # 评论数目
|
14
|
+
attr_accessor :authors # 抓取的作者信息
|
15
|
+
attr_accessor :publish_time # 发布时间
|
16
|
+
attr_accessor :errors # 错误
|
17
|
+
|
18
|
+
include InstanceMethods
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module InstanceMethods
|
23
|
+
def initialize attrs={}
|
24
|
+
attrs.each do |key, value|
|
25
|
+
self.send(:"#{key}=", value)
|
26
|
+
end
|
27
|
+
|
28
|
+
self.errors = {}
|
29
|
+
end
|
30
|
+
|
31
|
+
def tags
|
32
|
+
@tags ||= []
|
33
|
+
end
|
34
|
+
|
35
|
+
def tags_array
|
36
|
+
@tags_array ||= self.tags.map{|_tag| _tag[:text] }
|
37
|
+
end
|
38
|
+
|
39
|
+
# 图片
|
40
|
+
def images
|
41
|
+
@images ||= Nokogiri::HTML.parse(self.content).css('img').map do |img_doc|
|
42
|
+
{
|
43
|
+
:src => img_doc.attr('src'),
|
44
|
+
:title => img_doc.attr('title'),
|
45
|
+
:alt => img_doc.attr('alt')
|
46
|
+
}
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def images?
|
51
|
+
self.images.any?
|
52
|
+
end
|
53
|
+
|
54
|
+
def authors
|
55
|
+
@authors ||= []
|
56
|
+
end
|
57
|
+
|
58
|
+
def authors_array
|
59
|
+
@authors_array ||= self.authors.map{|_author| _author[:text] }
|
60
|
+
end
|
61
|
+
|
62
|
+
def valid?
|
63
|
+
self.valid!
|
64
|
+
self.errors.empty?
|
65
|
+
end
|
66
|
+
|
67
|
+
def valid!
|
68
|
+
unless self.content.to_s.strip.size > 0
|
69
|
+
add_error :content, '不能为空'
|
70
|
+
end
|
71
|
+
|
72
|
+
unless self.url.to_s.strip.size > 0
|
73
|
+
add_error :url, '不能为空'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def add_error field, message
|
80
|
+
(self.errors[field] ||= []) << message
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
## -*- encoding : utf-8 -*-
|
3
|
+
module LoyalSpider
|
4
|
+
module EntityListerAble
|
5
|
+
# 有返回信息的
|
6
|
+
def self.included base
|
7
|
+
base.class_eval do
|
8
|
+
attr_writer :current_page # current_page
|
9
|
+
|
10
|
+
include ::LoyalSpider::FetchAble
|
11
|
+
include InstanceMethods
|
12
|
+
extend ClassMethods
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
module ClassMethods
|
17
|
+
# options:
|
18
|
+
# - url_format
|
19
|
+
# - url_format_first
|
20
|
+
# - url_format_options
|
21
|
+
def config_loyal_spider_entity_lister options={}
|
22
|
+
@entity_lister_options ||= options
|
23
|
+
|
24
|
+
self.config_loyal_spider_default_fetch_options(
|
25
|
+
self.entity_lister_options.delete(:fetch_options) || {}
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def paged_fetch page, options={}, &block
|
30
|
+
lister = self.new
|
31
|
+
lister.current_page = page
|
32
|
+
lister.fetch options, &block
|
33
|
+
end
|
34
|
+
|
35
|
+
def entity_lister_options
|
36
|
+
@entity_lister_options ||= {}
|
37
|
+
end
|
38
|
+
|
39
|
+
# 按页抓取
|
40
|
+
def paged_fetch page, options={}, &block
|
41
|
+
self.new.paged_fetch page, options, &block
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
module InstanceMethods
|
46
|
+
def paged_fetch page, options={}, &block
|
47
|
+
|
48
|
+
self.url_format_options.merge!(
|
49
|
+
options[:url_format_options] || {}
|
50
|
+
)
|
51
|
+
|
52
|
+
self.current_page = page
|
53
|
+
self.fetch options, &block
|
54
|
+
end
|
55
|
+
|
56
|
+
def url_format
|
57
|
+
@url_format ||= self.class.entity_lister_options[:url_format]
|
58
|
+
end
|
59
|
+
|
60
|
+
def url_format_first
|
61
|
+
@url_format_first ||= self.class.entity_lister_options[:url_format_first]
|
62
|
+
end
|
63
|
+
|
64
|
+
def url_format_options
|
65
|
+
@url_format_options ||= (self.class.entity_lister_options[:url_format_options] || {})
|
66
|
+
end
|
67
|
+
|
68
|
+
def _before_fetch options={}
|
69
|
+
@entities = []
|
70
|
+
end
|
71
|
+
|
72
|
+
def _after_fetch_success result
|
73
|
+
result.entities = self.entities
|
74
|
+
end
|
75
|
+
|
76
|
+
def entities
|
77
|
+
@entities ||= []
|
78
|
+
end
|
79
|
+
|
80
|
+
# TODO
|
81
|
+
def entity_clazz
|
82
|
+
self.class.entity_lister_options[:entity_clazz]
|
83
|
+
end
|
84
|
+
|
85
|
+
def new_entity attrs={}
|
86
|
+
if self.entity_clazz
|
87
|
+
self.entity_clazz.new(attrs)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def add_entity entity
|
92
|
+
self.entities << entity
|
93
|
+
end
|
94
|
+
|
95
|
+
def current_page
|
96
|
+
@current_page ||= 1
|
97
|
+
end
|
98
|
+
|
99
|
+
def first_page?
|
100
|
+
self.current_page < 2
|
101
|
+
end
|
102
|
+
|
103
|
+
def fetch_url
|
104
|
+
return @fetch_url if defined?(@fetch_url)
|
105
|
+
|
106
|
+
_url_format = self.first_page? ? self.url_format_first : self.url_format
|
107
|
+
|
108
|
+
@fetch_url ||= sprintf(
|
109
|
+
_url_format, (
|
110
|
+
self.url_format_options || {}
|
111
|
+
).merge(
|
112
|
+
:page => self.current_page
|
113
|
+
)
|
114
|
+
)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module FetchAble
|
4
|
+
# 有返回信息的
|
5
|
+
def self.included base
|
6
|
+
base.class_eval do
|
7
|
+
attr_accessor :fetch_options
|
8
|
+
|
9
|
+
include InstanceMethods
|
10
|
+
extend ClassMethods
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module ClassMethods
|
15
|
+
# 配置蜘蛛的抓取配置
|
16
|
+
def config_loyal_spider_default_fetch_options options={}
|
17
|
+
@default_fetch_options ||= options
|
18
|
+
end
|
19
|
+
|
20
|
+
def default_fetch_options
|
21
|
+
@default_fetch_options ||= {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def fetch options={}, &block
|
25
|
+
self.new.fetch options, &block
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module InstanceMethods
|
30
|
+
def base_url
|
31
|
+
self.fetch_options.base_url
|
32
|
+
end
|
33
|
+
|
34
|
+
def fetch options={}, &block
|
35
|
+
self._before_fetch options if self.respond_to?(:_before_fetch, true)
|
36
|
+
self.before_fetch options if self.respond_to?(:before_fetch, true)
|
37
|
+
|
38
|
+
result = _perform_fetch options, &block
|
39
|
+
|
40
|
+
if result.success?
|
41
|
+
self.after_fetch_success(result) if self.respond_to?(:after_fetch_success, true)
|
42
|
+
self._after_fetch_success(result) if self.respond_to?(:_after_fetch_success, true)
|
43
|
+
else
|
44
|
+
self.after_fetch_fail(result) if self.respond_to?(:after_fetch_fail, true)
|
45
|
+
self._after_fetch_fail(result) if self.respond_to?(:_after_fetch_fail, true)
|
46
|
+
end
|
47
|
+
|
48
|
+
result
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def _perform_fetch options={}, &block
|
54
|
+
_fetch_url = self.respond_to?(:fetch_url, true) ? self.fetch_url : self.url
|
55
|
+
|
56
|
+
@fetch_options = ::LoyalSpider::FetchOptions.new(
|
57
|
+
::LoyalSpider::HashUtil.deep_merge(self.class.default_fetch_options, options).merge(
|
58
|
+
:url => _fetch_url
|
59
|
+
)
|
60
|
+
)
|
61
|
+
|
62
|
+
begin
|
63
|
+
::RestClient::Request.execute @fetch_options.net_options do |response, request, response_result, &_block|
|
64
|
+
response_code = response.code
|
65
|
+
|
66
|
+
response_status = if (200..207).include?(response_code)
|
67
|
+
:success
|
68
|
+
elsif (300..307).include?(response_code)
|
69
|
+
:redirect
|
70
|
+
elsif (400..450).include?(response_code)
|
71
|
+
:request_error
|
72
|
+
else
|
73
|
+
:server_error
|
74
|
+
end
|
75
|
+
|
76
|
+
_result = ::LoyalSpider::FetchResult.new(
|
77
|
+
:response_status => response_status,
|
78
|
+
:response_code => response_code,
|
79
|
+
:response => response.force_encoding(self.fetch_options.encoding_type).encode!('UTF-8'),
|
80
|
+
:request => request,
|
81
|
+
:response_result => response_result,
|
82
|
+
:fetch_options => self.fetch_options,
|
83
|
+
:url => _fetch_url
|
84
|
+
)
|
85
|
+
|
86
|
+
if block_given?
|
87
|
+
block.call _result, &_block
|
88
|
+
end
|
89
|
+
|
90
|
+
_result
|
91
|
+
end
|
92
|
+
rescue Exception => exception
|
93
|
+
_error_result = {
|
94
|
+
:response => nil,
|
95
|
+
:request => nil,
|
96
|
+
:response_result => nil,
|
97
|
+
:fetch_options => self.fetch_options,
|
98
|
+
:url => _fetch_url,
|
99
|
+
:exception => exception
|
100
|
+
}
|
101
|
+
|
102
|
+
case exception
|
103
|
+
when ::SocketError
|
104
|
+
_error_result.merge!(
|
105
|
+
:response_status => :socket_error,
|
106
|
+
:response_code => 502
|
107
|
+
)
|
108
|
+
else
|
109
|
+
_error_result.merge!(
|
110
|
+
:response_status => :request_error,
|
111
|
+
:response_code => 400
|
112
|
+
)
|
113
|
+
end
|
114
|
+
|
115
|
+
_result = ::LoyalSpider::FetchResult.new(
|
116
|
+
_error_result
|
117
|
+
)
|
118
|
+
|
119
|
+
if block_given?
|
120
|
+
block.call _result, &_block
|
121
|
+
end
|
122
|
+
|
123
|
+
_result
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class FetchOptions
|
4
|
+
attr_accessor :method # 抓取方法
|
5
|
+
attr_accessor :open_timeout # 打开超时配置
|
6
|
+
attr_accessor :timeout # 打开超时配置
|
7
|
+
attr_accessor :encoding_type # 编码
|
8
|
+
attr_accessor :headers # 请求头
|
9
|
+
attr_accessor :url # url
|
10
|
+
attr_accessor :base_url # base_url
|
11
|
+
|
12
|
+
def initialize attrs={}
|
13
|
+
@url = attrs[:url] || ''
|
14
|
+
@base_url = attrs[:base_url] || ''
|
15
|
+
@method = attrs[:method] || :get
|
16
|
+
@timeout = attrs[:timeout] || 60 # 单位秒
|
17
|
+
@open_timeout = attrs[:open_time] || @timeout
|
18
|
+
@encoding_type = attrs[:encoding_type] || 'UTF-8'
|
19
|
+
@headers = {
|
20
|
+
:accept_charset => 'UTF-8,*;q=0.5',
|
21
|
+
:accept_encoding => 'gzip,deflate,sdch',
|
22
|
+
:user_agent => 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17'
|
23
|
+
}.merge(attrs[:headers] || {})
|
24
|
+
end
|
25
|
+
|
26
|
+
def net_options
|
27
|
+
{
|
28
|
+
:url => self.url,
|
29
|
+
:method => self.method,
|
30
|
+
:timeout => self.timeout,
|
31
|
+
:open_timeout => self.open_timeout,
|
32
|
+
:encoding_type => self.encoding_type,
|
33
|
+
:headers => self.headers
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class FetchResult
|
4
|
+
|
5
|
+
attr_accessor :response_status
|
6
|
+
attr_accessor :response_code
|
7
|
+
attr_accessor :response
|
8
|
+
attr_accessor :request
|
9
|
+
attr_accessor :response_result
|
10
|
+
attr_accessor :fetch_options
|
11
|
+
attr_accessor :url
|
12
|
+
attr_accessor :exception
|
13
|
+
attr_accessor :entities
|
14
|
+
|
15
|
+
def initialize attrs={}
|
16
|
+
attrs.each do |key, value|
|
17
|
+
self.send(:"#{key}=", value)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def success?
|
22
|
+
self.response_status == :success
|
23
|
+
end
|
24
|
+
|
25
|
+
def fail?
|
26
|
+
!success?
|
27
|
+
end
|
28
|
+
|
29
|
+
def response_html_doc
|
30
|
+
@response_html_doc ||= Nokogiri::HTML.parse(self.response) if self.response
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require "#{File.dirname(__FILE__)}/clients/kuaile_mahua/article_entity"
|
3
|
+
require "#{File.dirname(__FILE__)}/clients/kuaile_mahua/article_entity_lister"
|
4
|
+
|
5
|
+
require "#{File.dirname(__FILE__)}/clients/haha365/article_entity"
|
6
|
+
require "#{File.dirname(__FILE__)}/clients/haha365/article_entity_lister"
|
7
|
+
|
8
|
+
require "#{File.dirname(__FILE__)}/clients/lengxiaohua/article_entity"
|
9
|
+
require "#{File.dirname(__FILE__)}/clients/lengxiaohua/article_entity_lister"
|
10
|
+
|
11
|
+
require "#{File.dirname(__FILE__)}/clients/xiaohuadi/article_entity"
|
12
|
+
require "#{File.dirname(__FILE__)}/clients/xiaohuadi/article_entity_lister"
|
13
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Haha365
|
5
|
+
class ArticleEntityLister
|
6
|
+
include ::LoyalSpider::EntityListerAble
|
7
|
+
|
8
|
+
self.config_loyal_spider_entity_lister :url_format_first => 'http://www.haha365.com/%{category}/',
|
9
|
+
:url_format => 'http://www.haha365.com/%{category}/index_%{page}.htm',
|
10
|
+
:entity_clazz => ::LoyalSpider::Clients::Haha365::ArticleEntity,
|
11
|
+
:fetch_options => {
|
12
|
+
:encoding_type => 'GBK',
|
13
|
+
:base_url => 'http://www.haha365.com'
|
14
|
+
}
|
15
|
+
|
16
|
+
# TODO:
|
17
|
+
def before_fetch options={}
|
18
|
+
puts "before_fetch: #{options}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# TODO
|
22
|
+
def after_fetch_success result
|
23
|
+
# puts "after_fetch success: #{result}"
|
24
|
+
html_doc = result.response_html_doc
|
25
|
+
|
26
|
+
# _basic_doc = html_doc.css('html #main .content .left .r_c .cat_llb')
|
27
|
+
_basic_doc = html_doc.css('html #main .content .left .r_c')
|
28
|
+
|
29
|
+
# :content # 正文
|
30
|
+
# :tags # 标签
|
31
|
+
# :tags_text # 标签
|
32
|
+
# :up_rating # 好评数
|
33
|
+
# :down_rating # 差评数目
|
34
|
+
# :comments_count # 评论数目
|
35
|
+
# :authors # 抓取的作者信息
|
36
|
+
|
37
|
+
(0...(_basic_doc.css('.cat_llb .fl a').size)).each do |_index|
|
38
|
+
|
39
|
+
_title_doc = _basic_doc.css('.cat_llb h3 a')[_index]
|
40
|
+
_content_doc = _basic_doc.css('.cat_llb #endtext')[_index]
|
41
|
+
_category_doc = _basic_doc.css('.cat_llb .fl a')[_index]
|
42
|
+
|
43
|
+
if _title_doc.nil? || _content_doc.nil? || _category_doc.nil?
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
47
|
+
_entity_attr = {}
|
48
|
+
|
49
|
+
_text_content = _content_doc.try :inner_html
|
50
|
+
|
51
|
+
_content = _text_content.to_s.split("<br>\r\n").map do |_cnt|
|
52
|
+
"<p>#{(Sanitize.clean _cnt).to_s.strip}</p>"
|
53
|
+
end.join('')
|
54
|
+
|
55
|
+
_entity_attr[:content] = _content
|
56
|
+
|
57
|
+
_entity_attr[:url] = "#{self.base_url}#{_title_doc.attr('href')}"
|
58
|
+
_entity_attr[:title] = "#{_title_doc.text}"
|
59
|
+
|
60
|
+
if _category_doc
|
61
|
+
_entity_attr[:tags] = [
|
62
|
+
{
|
63
|
+
:text => _category_doc.text,
|
64
|
+
:href => "#{self.base_url}#{_category_doc.attr('href')}"
|
65
|
+
}
|
66
|
+
]
|
67
|
+
else
|
68
|
+
_entity_attr[:tags] = []
|
69
|
+
end
|
70
|
+
|
71
|
+
_entity_attr[:authors] = []
|
72
|
+
|
73
|
+
_entity_attr[:up_rating] = -1
|
74
|
+
_entity_attr[:down_rating] = -1
|
75
|
+
_entity_attr[:comments_count] = -1
|
76
|
+
|
77
|
+
_entity = self.new_entity(_entity_attr)
|
78
|
+
|
79
|
+
if _entity.valid?
|
80
|
+
self.add_entity _entity
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# debugger
|
85
|
+
end
|
86
|
+
|
87
|
+
def after_fetch_fail result
|
88
|
+
puts "after_fetch fail: #{result}"
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module KuaileMahua
|
5
|
+
class ArticleEntity
|
6
|
+
include ::LoyalSpider::EntityAble
|
7
|
+
include ::LoyalSpider::FetchAble
|
8
|
+
|
9
|
+
self.config_loyal_spider_default_fetch_options(
|
10
|
+
:encoding_type => 'GBK',
|
11
|
+
:base_url => 'http://www.kl688.com'
|
12
|
+
)
|
13
|
+
|
14
|
+
# TODO
|
15
|
+
def after_fetch_success result
|
16
|
+
# puts "after_fetch success: #{result}"
|
17
|
+
html_doc = result.response_html_doc
|
18
|
+
entity_doc = html_doc.css('.main .main-left .xiaohua .xiaohua-data')
|
19
|
+
|
20
|
+
self.title = entity_doc.css('h1').first.text.to_s.strip
|
21
|
+
self.content = entity_doc.css('.content').inner_html
|
22
|
+
self.tags = entity_doc.css('.link .tags h4 a').map do |_tag_doc|
|
23
|
+
{
|
24
|
+
:text => _tag_doc.text.to_s.strip,
|
25
|
+
:href => "#{self.base_url}#{_tag_doc.attr('href').to_s.strip}"
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
self.tags_text = self.tags.map do |_tag|
|
30
|
+
_tag[:text]
|
31
|
+
end
|
32
|
+
|
33
|
+
self.authors = entity_doc.css('.link .tags .pusher a').map do |_author_doc|
|
34
|
+
{
|
35
|
+
:text => _author_doc.text.to_s.strip,
|
36
|
+
:href => "#{_author_doc.attr('href').to_s.strip}"
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
self.up_rating = entity_doc.css('.tools li a.good').text.to_i
|
41
|
+
self.down_rating = entity_doc.css('.tools li a.bad').text.to_i
|
42
|
+
self.comments_count = entity_doc.css('.tools li s').first.text.to_i
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module KuaileMahua
|
5
|
+
class ArticleEntityLister
|
6
|
+
include ::LoyalSpider::EntityListerAble
|
7
|
+
|
8
|
+
self.config_loyal_spider_entity_lister :url_format_first => 'http://www.kl688.com/',
|
9
|
+
:url_format => 'http://www.kl688.com/newjokes/index_%{page}.htm',
|
10
|
+
:entity_clazz => ::LoyalSpider::Clients::KuaileMahua::ArticleEntity,
|
11
|
+
:fetch_options => {
|
12
|
+
:encoding_type => 'GBK',
|
13
|
+
:base_url => 'http://www.kl688.com'
|
14
|
+
}
|
15
|
+
|
16
|
+
# TODO:
|
17
|
+
def before_fetch options={}
|
18
|
+
puts "before_fetch: #{options}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# TODO
|
22
|
+
def after_fetch_success result
|
23
|
+
# puts "after_fetch success: #{result}"
|
24
|
+
html_doc = result.response_html_doc
|
25
|
+
|
26
|
+
html_doc.css('.main .main-left .xiaohua').each do |entity_doc|
|
27
|
+
_entity_attr = {}
|
28
|
+
|
29
|
+
_fetch_options = result.fetch_options
|
30
|
+
_base_url = _fetch_options.base_url.to_s.strip
|
31
|
+
|
32
|
+
_title_link = entity_doc.css('h3 a').first
|
33
|
+
|
34
|
+
# :content # 正文
|
35
|
+
# :tags # 标签
|
36
|
+
# :tags_text # 标签
|
37
|
+
# :up_rating # 好评数
|
38
|
+
# :down_rating # 差评数目
|
39
|
+
# :comments_count # 评论数目
|
40
|
+
# :authors # 抓取的作者信息
|
41
|
+
|
42
|
+
_entity_attr[:url] = "#{_base_url}#{_title_link.attr('href').to_s.strip}"
|
43
|
+
|
44
|
+
_entity_attr[:title] = _title_link.text
|
45
|
+
_entity_attr[:content] = entity_doc.css('.content').inner_html
|
46
|
+
_entity_attr[:tags] = entity_doc.css('.link .tags h4 a').map do |_tag_doc|
|
47
|
+
{
|
48
|
+
:text => _tag_doc.text.to_s.strip,
|
49
|
+
:href => "#{_base_url}#{_tag_doc.attr('href').to_s.strip}"
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
_entity_attr[:authors] = entity_doc.css('.link .tags .pusher a').map do |_author_doc|
|
54
|
+
{
|
55
|
+
:text => _author_doc.text.to_s.strip,
|
56
|
+
:href => "#{_author_doc.attr('href').to_s.strip}"
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
_entity_attr[:up_rating] = entity_doc.css('.tools li a.good').text.to_i
|
61
|
+
_entity_attr[:down_rating] = entity_doc.css('.tools li a.bad').text.to_i
|
62
|
+
_entity_attr[:comments_count] = entity_doc.css('.tools li s').first.text.to_i
|
63
|
+
|
64
|
+
_entity = self.new_entity(_entity_attr)
|
65
|
+
|
66
|
+
if entity_doc.css('.content .more a').any?
|
67
|
+
_entity.fetch
|
68
|
+
end
|
69
|
+
|
70
|
+
if _entity.valid?
|
71
|
+
self.add_entity _entity
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# debugger
|
76
|
+
end
|
77
|
+
|
78
|
+
def after_fetch_fail result
|
79
|
+
puts "after_fetch fail: #{result}"
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Lengxiaohua
|
5
|
+
class ArticleEntity
|
6
|
+
include ::LoyalSpider::EntityAble
|
7
|
+
# include ::LoyalSpider::FetchAble
|
8
|
+
|
9
|
+
# self.config_loyal_spider_default_fetch_options(
|
10
|
+
# :encoding_type => 'UTF-8',
|
11
|
+
# :base_url => 'http://lengxiaohua.com'
|
12
|
+
# )
|
13
|
+
|
14
|
+
def valid?
|
15
|
+
super
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Lengxiaohua
|
5
|
+
class ArticleEntityLister
|
6
|
+
include ::LoyalSpider::EntityListerAble
|
7
|
+
|
8
|
+
self.config_loyal_spider_entity_lister :url_format_first => 'http://lengxiaohua.com/',
|
9
|
+
:url_format => 'http://lengxiaohua.com/?page_num=%{page}',
|
10
|
+
:entity_clazz => ::LoyalSpider::Clients::Lengxiaohua::ArticleEntity,
|
11
|
+
:fetch_options => {
|
12
|
+
:encoding_type => 'UTF-8',
|
13
|
+
:base_url => 'http://lengxiaohua.com'
|
14
|
+
}
|
15
|
+
|
16
|
+
# TODO:
|
17
|
+
def before_fetch options={}
|
18
|
+
puts "before_fetch: #{options}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# TODO
|
22
|
+
def after_fetch_success result
|
23
|
+
# puts "after_fetch success: #{result}"
|
24
|
+
html_doc = result.response_html_doc
|
25
|
+
|
26
|
+
html_doc.css('.joke_wrap li.joke_li').each do |entity_doc|
|
27
|
+
_entity_attr = {}
|
28
|
+
|
29
|
+
_fetch_options = result.fetch_options
|
30
|
+
_base_url = _fetch_options.base_url.to_s.strip
|
31
|
+
|
32
|
+
# :content # 正文
|
33
|
+
# :tags # 标签
|
34
|
+
# :tags_text # 标签
|
35
|
+
# :up_rating # 好评数
|
36
|
+
# :down_rating # 差评数目
|
37
|
+
# :comments_count # 评论数目
|
38
|
+
# :authors # 抓取的作者信息
|
39
|
+
|
40
|
+
_joke_id = entity_doc.css('.para_info .para_tool a').first.attr('jokeid')
|
41
|
+
|
42
|
+
_entity_attr[:url] = "#{_base_url}/joke/#{_joke_id}"
|
43
|
+
|
44
|
+
_entity_attr[:title] = ''
|
45
|
+
|
46
|
+
_text_content = entity_doc.css('.para_can pre').first.inner_html
|
47
|
+
|
48
|
+
_content = Sanitize.clean(_text_content).split(/\n/).map do |_cnt|
|
49
|
+
"<p>#{_cnt}</p>"
|
50
|
+
end.join('')
|
51
|
+
|
52
|
+
if _img_box = entity_doc.css('.default_load_imgbox').first
|
53
|
+
|
54
|
+
_image_content = _img_box.css('img').map do |_img|
|
55
|
+
"<img src='#{_img.attr('data-original').to_s.gsub('!water', '')}'/>"
|
56
|
+
end
|
57
|
+
|
58
|
+
_content = _content + "<p>#{_image_content.join('')}</p>" if _image_content.any?
|
59
|
+
end
|
60
|
+
|
61
|
+
_entity_attr[:content] = _content
|
62
|
+
|
63
|
+
_entity_attr[:tags] = entity_doc.css('.tag_box a').map do |_tag_doc|
|
64
|
+
{
|
65
|
+
:text => _tag_doc.text,
|
66
|
+
:href => "#{self.base_url}#{_tag_doc.attr('href')}"
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
# debugger
|
71
|
+
|
72
|
+
_author_doc = entity_doc.css('.para_info .user_info a').first
|
73
|
+
|
74
|
+
if _author_doc
|
75
|
+
_entity_attr[:authors] = [
|
76
|
+
{
|
77
|
+
:text => _author_doc.text.to_s.strip,
|
78
|
+
:href => "#{self.base_url}#{_author_doc.attr('href').to_s.strip}"
|
79
|
+
}
|
80
|
+
]
|
81
|
+
else
|
82
|
+
_entity_attr[:authors] = []
|
83
|
+
end
|
84
|
+
|
85
|
+
_tool_doc = entity_doc.css('.para_tool')
|
86
|
+
|
87
|
+
_entity_attr[:up_rating] = _tool_doc.css('a[report=like_joke] span').last.text.gsub(/\W/, '').to_i
|
88
|
+
_entity_attr[:down_rating] = _tool_doc.css('a[report=unlike_joke] span').last.text.gsub(/\W/, '').to_i
|
89
|
+
_entity_attr[:comments_count] = _tool_doc.css("#show_comment_count_#{_joke_id}").text.gsub(/\W/, '').to_i
|
90
|
+
|
91
|
+
_entity = self.new_entity(_entity_attr)
|
92
|
+
|
93
|
+
if _entity.valid?
|
94
|
+
self.add_entity _entity
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
def after_fetch_fail result
|
101
|
+
puts "after_fetch fail: #{result}"
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Xiaohuadi
|
5
|
+
class ArticleEntity
|
6
|
+
include ::LoyalSpider::EntityAble
|
7
|
+
include ::LoyalSpider::FetchAble
|
8
|
+
|
9
|
+
self.config_loyal_spider_default_fetch_options(
|
10
|
+
:encoding_type => 'GBK',
|
11
|
+
:base_url => 'http://www.xiaohuadi.com'
|
12
|
+
)
|
13
|
+
|
14
|
+
# TODO
|
15
|
+
def after_fetch_success result
|
16
|
+
# puts "after_fetch success: #{result}"
|
17
|
+
entity_doc = result.response_html_doc.css('.listx')
|
18
|
+
|
19
|
+
_fetch_options = self.fetch_options
|
20
|
+
_base_url = _fetch_options.base_url.to_s.strip
|
21
|
+
|
22
|
+
# :content # 正文
|
23
|
+
# :tags # 标签
|
24
|
+
# :tags_text # 标签
|
25
|
+
# :up_rating # 好评数
|
26
|
+
# :down_rating # 差评数目
|
27
|
+
# :comments_count # 评论数目
|
28
|
+
# :authors # 抓取的作者信息
|
29
|
+
|
30
|
+
_text_doc = entity_doc.css('.sonxltitle h1')
|
31
|
+
|
32
|
+
self.title = "#{_text_doc.text}"
|
33
|
+
|
34
|
+
_text_content = entity_doc.css('.sonxlarticle').first.inner_html
|
35
|
+
|
36
|
+
_content = _text_content.split("<br>\r\n").map do |_cnt|
|
37
|
+
"<p>#{Sanitize.clean _cnt}</p>"
|
38
|
+
end.join('')
|
39
|
+
|
40
|
+
self.content = _content
|
41
|
+
|
42
|
+
_category_doc = entity_doc.css('.sonxlPosition a').last
|
43
|
+
|
44
|
+
if _category_doc
|
45
|
+
self.tags = [
|
46
|
+
{
|
47
|
+
:text => _category_doc.text,
|
48
|
+
:href => "#{self.base_url}#{_category_doc.attr('href')}"
|
49
|
+
}
|
50
|
+
]
|
51
|
+
else
|
52
|
+
self.tags = []
|
53
|
+
end
|
54
|
+
|
55
|
+
self.authors = []
|
56
|
+
|
57
|
+
self.up_rating = -1
|
58
|
+
self.down_rating = -1
|
59
|
+
self.comments_count = -1
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
module Clients
|
4
|
+
module Xiaohuadi
|
5
|
+
class ArticleEntityLister
|
6
|
+
include ::LoyalSpider::EntityListerAble
|
7
|
+
|
8
|
+
self.config_loyal_spider_entity_lister :url_format_first => 'http://www.xiaohuadi.com/%{category}/',
|
9
|
+
:url_format => 'http://www.xiaohuadi.com/%{category}/index_%{page}.html',
|
10
|
+
:entity_clazz => ::LoyalSpider::Clients::Xiaohuadi::ArticleEntity,
|
11
|
+
:fetch_options => {
|
12
|
+
:encoding_type => 'GBK',
|
13
|
+
:base_url => 'http://www.xiaohuadi.com'
|
14
|
+
}
|
15
|
+
|
16
|
+
# TODO:
|
17
|
+
def before_fetch options={}
|
18
|
+
puts "before_fetch: #{options}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# TODO
|
22
|
+
def after_fetch_success result
|
23
|
+
# puts "after_fetch success: #{result}"
|
24
|
+
html_doc = result.response_html_doc
|
25
|
+
|
26
|
+
html_doc.css('.ilistxllist>ul').each do |entity_doc|
|
27
|
+
_entity_attr = {}
|
28
|
+
|
29
|
+
_fetch_options = result.fetch_options
|
30
|
+
_base_url = _fetch_options.base_url.to_s.strip
|
31
|
+
|
32
|
+
# :content # 正文
|
33
|
+
# :tags # 标签
|
34
|
+
# :tags_text # 标签
|
35
|
+
# :up_rating # 好评数
|
36
|
+
# :down_rating # 差评数目
|
37
|
+
# :comments_count # 评论数目
|
38
|
+
# :authors # 抓取的作者信息
|
39
|
+
|
40
|
+
_link_doc = entity_doc.css('.ilistxlctlB1 a')
|
41
|
+
|
42
|
+
_entity_attr[:url] = "#{_base_url}#{_link_doc.attr('href')}"
|
43
|
+
|
44
|
+
_entity_attr[:title] = "#{_link_doc.text}"
|
45
|
+
|
46
|
+
_text_content = entity_doc.css('.ilistxlctlB2').first.inner_html
|
47
|
+
|
48
|
+
_content = _text_content.split("<br>\r\n").map do |_cnt|
|
49
|
+
"<p>#{Sanitize.clean _cnt}</p>"
|
50
|
+
end.join('')
|
51
|
+
|
52
|
+
_entity_attr[:content] = _content
|
53
|
+
|
54
|
+
_category_doc = entity_doc.css('.ilistxlctlC table td a').last
|
55
|
+
|
56
|
+
if _category_doc
|
57
|
+
_entity_attr[:tags] = [
|
58
|
+
{
|
59
|
+
:text => _category_doc.text,
|
60
|
+
:href => "#{self.base_url}#{_category_doc.attr('href')}"
|
61
|
+
}
|
62
|
+
]
|
63
|
+
else
|
64
|
+
_entity_attr[:tags] = []
|
65
|
+
end
|
66
|
+
|
67
|
+
_entity_attr[:authors] = []
|
68
|
+
|
69
|
+
_tool_doc = entity_doc.css('.ilistxlctlA ul li')
|
70
|
+
|
71
|
+
_entity_attr[:up_rating] = _tool_doc[1].text.to_i
|
72
|
+
_entity_attr[:down_rating] = _tool_doc[2].text.to_i
|
73
|
+
_entity_attr[:comments_count] = _tool_doc[0].text.to_i
|
74
|
+
|
75
|
+
_entity = self.new_entity(_entity_attr)
|
76
|
+
|
77
|
+
if _entity.content.include?('未显示完,查看全文')
|
78
|
+
_entity.fetch
|
79
|
+
end
|
80
|
+
|
81
|
+
if _entity.valid?
|
82
|
+
self.add_entity _entity
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# debugger
|
87
|
+
end
|
88
|
+
|
89
|
+
def after_fetch_fail result
|
90
|
+
puts "after_fetch fail: #{result}"
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class << self
|
4
|
+
attr_writer :config
|
5
|
+
|
6
|
+
def config
|
7
|
+
@config ||= Config.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def configure
|
11
|
+
yield self.config ||= Config.new
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
class Config
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class ArrayUtil
|
4
|
+
def self.extract_options!(arr)
|
5
|
+
if arr.last.is_a?(Hash)
|
6
|
+
arr.pop
|
7
|
+
else
|
8
|
+
{}
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.init args
|
13
|
+
args.is_a?(Array) ? args : (args.nil? ? [] : [args])
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module LoyalSpider
|
3
|
+
class HashUtil
|
4
|
+
def self.deep_merge!(a_hash, b_hash)
|
5
|
+
b_hash.each_pair do |k,v|
|
6
|
+
tv = a_hash[k]
|
7
|
+
a_hash[k] = tv.is_a?(Hash) && v.is_a?(Hash) ? self.deep_merge(a_hash, v) : v
|
8
|
+
end
|
9
|
+
|
10
|
+
a_hash
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.deep_merge a_hash, b_hash
|
14
|
+
self.deep_merge! self.deep_dup(a_hash), b_hash
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.deep_dup hash
|
18
|
+
duplicate = hash.dup
|
19
|
+
|
20
|
+
duplicate.each_pair do |k,v|
|
21
|
+
tv = duplicate[k]
|
22
|
+
duplicate[k] = tv.is_a?(Hash) && v.is_a?(Hash) ? (self.deep_dup(tv)) : v
|
23
|
+
end
|
24
|
+
|
25
|
+
duplicate
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: loyal_spider
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- happy
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ">="
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rest-client
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: sanitize
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rails_config
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: nokogiri
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
description: Description of LoyalSpider.
|
111
|
+
email:
|
112
|
+
- andywang7259@gmail.com
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- lib/loyal_spider/ables/fetch_options.rb
|
118
|
+
- lib/loyal_spider/ables/fetch_able.rb
|
119
|
+
- lib/loyal_spider/ables/fetch_result.rb
|
120
|
+
- lib/loyal_spider/ables/entity_lister_able.rb
|
121
|
+
- lib/loyal_spider/ables/entity_able.rb
|
122
|
+
- lib/loyal_spider/utils/hash_util.rb
|
123
|
+
- lib/loyal_spider/utils/array_util.rb
|
124
|
+
- lib/loyal_spider/config.rb
|
125
|
+
- lib/loyal_spider/utils.rb
|
126
|
+
- lib/loyal_spider/version.rb
|
127
|
+
- lib/loyal_spider/image.rb
|
128
|
+
- lib/loyal_spider/clients.rb
|
129
|
+
- lib/loyal_spider/clients/kuaile_mahua/article_entity.rb
|
130
|
+
- lib/loyal_spider/clients/kuaile_mahua/article_entity_lister.rb
|
131
|
+
- lib/loyal_spider/clients/xiaohuadi/article_entity.rb
|
132
|
+
- lib/loyal_spider/clients/xiaohuadi/article_entity_lister.rb
|
133
|
+
- lib/loyal_spider/clients/haha365/article_entity.rb
|
134
|
+
- lib/loyal_spider/clients/haha365/article_entity_lister.rb
|
135
|
+
- lib/loyal_spider/clients/lengxiaohua/article_entity.rb
|
136
|
+
- lib/loyal_spider/clients/lengxiaohua/article_entity_lister.rb
|
137
|
+
- lib/loyal_spider/ables.rb
|
138
|
+
- lib/loyal_spider.rb
|
139
|
+
- lib/tasks/loyal_spider_tasks.rake
|
140
|
+
- MIT-LICENSE
|
141
|
+
- Rakefile
|
142
|
+
- README.md
|
143
|
+
homepage: http://github.com/xiuxian123
|
144
|
+
licenses: []
|
145
|
+
post_install_message:
|
146
|
+
rdoc_options: []
|
147
|
+
require_paths:
|
148
|
+
- lib
|
149
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
150
|
+
none: false
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
segments:
|
156
|
+
- 0
|
157
|
+
hash: -2402893976680931226
|
158
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
159
|
+
none: false
|
160
|
+
requirements:
|
161
|
+
- - ">="
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0'
|
164
|
+
segments:
|
165
|
+
- 0
|
166
|
+
hash: -2402893976680931226
|
167
|
+
requirements: []
|
168
|
+
rubyforge_project:
|
169
|
+
rubygems_version: 1.8.25
|
170
|
+
signing_key:
|
171
|
+
specification_version: 3
|
172
|
+
summary: Summary of LoyalSpider.
|
173
|
+
test_files: []
|