spider_bot 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.yardopts +2 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +148 -0
- data/Rakefile +2 -0
- data/bin/spider +12 -0
- data/lib/spider_bot/base.rb +31 -0
- data/lib/spider_bot/cli.rb +183 -0
- data/lib/spider_bot/crawl.rb +235 -0
- data/lib/spider_bot/error.rb +5 -0
- data/lib/spider_bot/http/client.rb +166 -0
- data/lib/spider_bot/http/response.rb +83 -0
- data/lib/spider_bot/load.rb +30 -0
- data/lib/spider_bot/logging.rb +21 -0
- data/lib/spider_bot/railte.rb +6 -0
- data/lib/spider_bot/string/date.yml +29 -0
- data/lib/spider_bot/string/time.rb +119 -0
- data/lib/spider_bot/version.rb +3 -0
- data/lib/spider_bot.rb +37 -0
- data/spider_bot.gemspec +32 -0
- metadata +206 -0
@@ -0,0 +1,166 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
|
3
|
+
module SpiderBot
|
4
|
+
module Http
|
5
|
+
class Client
|
6
|
+
|
7
|
+
# return url for HttpClient
|
8
|
+
attr_reader :url
|
9
|
+
|
10
|
+
# return http user_agent for HttpClient
|
11
|
+
attr_reader :user_agent
|
12
|
+
|
13
|
+
attr_reader :headers
|
14
|
+
|
15
|
+
#
|
16
|
+
attr_accessor :options
|
17
|
+
|
18
|
+
# return connection for HttpClient
|
19
|
+
attr_accessor :connection
|
20
|
+
|
21
|
+
attr_accessor :conn_build
|
22
|
+
|
23
|
+
# Supported User-Agent
|
24
|
+
#
|
25
|
+
# * Linux Firefox (3.6.1)
|
26
|
+
# * Linux Konqueror (3)
|
27
|
+
# * Linux Mozilla
|
28
|
+
# * Linux Chrome
|
29
|
+
# * Mac Firefox
|
30
|
+
# * Mac Mozilla
|
31
|
+
# * Mac Chrome
|
32
|
+
# * Mac Safari
|
33
|
+
# * Mechanize (default)
|
34
|
+
# * Windows IE 6
|
35
|
+
# * Windows IE 7
|
36
|
+
# * Windows IE 8
|
37
|
+
# * Windows IE 9
|
38
|
+
# * Windows Mozilla
|
39
|
+
# * iPhone (3.0)
|
40
|
+
# * iPad
|
41
|
+
# * Android
|
42
|
+
|
43
|
+
USER_AGENT = {
|
44
|
+
'bot' => "bot/#{SpiderBot::VERSION}",
|
45
|
+
'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
|
46
|
+
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
47
|
+
'Linux Chrome' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624 Chrome/26.0.1410.43',
|
48
|
+
'Mac Firefox' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) Gecko/20100101 Firefox/35.0',
|
49
|
+
'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/600.3.18 (KHTML, like Gecko) Version/8.0.3 Safari/600.3.18',
|
50
|
+
'Mac Chrome' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36',
|
51
|
+
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
52
|
+
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
53
|
+
'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
54
|
+
'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
|
55
|
+
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
|
56
|
+
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
|
57
|
+
'iPad' => 'Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
|
58
|
+
'Android' => 'Mozilla/5.0 (Linux; U; Android 3.0; en-us) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
|
59
|
+
}
|
60
|
+
|
61
|
+
# Initialize a new HttpClient
|
62
|
+
#
|
63
|
+
# @param uri [String] the uri with
|
64
|
+
# @param options [Hash] the options to create a http with configure
|
65
|
+
# @option options [String] :header set the http request headers
|
66
|
+
# @yield [builder]
|
67
|
+
#
|
68
|
+
# @example
|
69
|
+
# http = HttpClient.new
|
70
|
+
#
|
71
|
+
# http = HttpClient.new do |http|
|
72
|
+
# http.user_agent= "Mac Safri"
|
73
|
+
# http.url= "http://example.com"
|
74
|
+
# end
|
75
|
+
|
76
|
+
def initialize(uri = nil, options = nil, &block)
|
77
|
+
@url = uri
|
78
|
+
@options = options
|
79
|
+
@user_agent ||= USER_AGENT['bot']
|
80
|
+
yield self if block_given?
|
81
|
+
end
|
82
|
+
|
83
|
+
def builder(&block)
|
84
|
+
@conn_build = block
|
85
|
+
end
|
86
|
+
|
87
|
+
# Set the url for HttpClient
|
88
|
+
#
|
89
|
+
# @param uri [String] the HttpClient url
|
90
|
+
|
91
|
+
def url=(uri)
|
92
|
+
@conn = nil
|
93
|
+
@url = uri
|
94
|
+
end
|
95
|
+
|
96
|
+
# Set the headers for HttpClient
|
97
|
+
#
|
98
|
+
# @param headers [String] the HttpClient url
|
99
|
+
# @return [String]
|
100
|
+
|
101
|
+
def headers=(headers)
|
102
|
+
@headers = headers.merge({"User-Agent" => user_agent})
|
103
|
+
end
|
104
|
+
|
105
|
+
# Set the user agent for HttpClient
|
106
|
+
#
|
107
|
+
# @param name [Symbol] the HttpClient user agent
|
108
|
+
|
109
|
+
def user_agent=(name)
|
110
|
+
@user_agent = USER_AGENT[name] || USER_AGENT['bot']
|
111
|
+
end
|
112
|
+
|
113
|
+
# The Faraday connection object
|
114
|
+
# @return [connection] The Faraday connection builder
|
115
|
+
|
116
|
+
def connection
|
117
|
+
@connection ||= begin
|
118
|
+
conn = Faraday.new(url: url)
|
119
|
+
conn.build do |b|
|
120
|
+
conn_build.call(b)
|
121
|
+
end if conn_build
|
122
|
+
conn
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
# Make request with HttpClient
|
128
|
+
#
|
129
|
+
# @param verb [Symbol] verb one of :get, :post, :put, :delete
|
130
|
+
# @param uri [String] URL path for request
|
131
|
+
# @param query [Hash] additional query parameters for the URL of the request
|
132
|
+
|
133
|
+
def request(verb, uri, query={})
|
134
|
+
verb == :get ? query_get = query : query_post = query
|
135
|
+
uri = connection.build_url(uri, query_get)
|
136
|
+
|
137
|
+
response = connection.run_request(verb, uri, query_post, headers) do |request|
|
138
|
+
yield request if block_given?
|
139
|
+
end
|
140
|
+
response = Response.new(response)
|
141
|
+
|
142
|
+
case response.status
|
143
|
+
when 301, 302, 303, 307
|
144
|
+
request(verb, response.headers['location'], query)
|
145
|
+
when 200..299, 300..399
|
146
|
+
response
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Handle get request with HttpClient
|
151
|
+
#
|
152
|
+
# @param uri [String] URL path for request
|
153
|
+
# @param query [Hash] additional query parameters for the URL of the request
|
154
|
+
|
155
|
+
def get(uri, query = {}, &block)
|
156
|
+
request(:get, uri, query, &block)
|
157
|
+
end
|
158
|
+
|
159
|
+
# Handle post request with HttpClient
|
160
|
+
# @param (see #get)
|
161
|
+
def post(uri, query = {}, &block)
|
162
|
+
request(:post, uri, query, &block)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module SpiderBot
|
2
|
+
module Http
|
3
|
+
class Response
|
4
|
+
attr_reader :response
|
5
|
+
|
6
|
+
CONTENT_TYPE = {
|
7
|
+
'application/json' => :json,
|
8
|
+
'application/x-www-form-urlencoded' => :html,
|
9
|
+
'text/html' => :html,
|
10
|
+
'text/javascript' => :json,
|
11
|
+
'text/xml' => :xml
|
12
|
+
}
|
13
|
+
|
14
|
+
PARSERS = {
|
15
|
+
:json => lambda{ |body| MultiJson.respond_to?(:adapter) ? MultiJson.load(body) : MultiJson.decode(body) rescue body},
|
16
|
+
:html => lambda{ |body| Nokogiri::HTML(body)},
|
17
|
+
:xml => lambda{ |body| MultiXml.parse(body) }
|
18
|
+
}
|
19
|
+
|
20
|
+
def initialize(response)
|
21
|
+
@response = response
|
22
|
+
end
|
23
|
+
|
24
|
+
def headers
|
25
|
+
response.headers
|
26
|
+
end
|
27
|
+
|
28
|
+
def body(options = {})
|
29
|
+
options = options || {}
|
30
|
+
decode(response.body, options)
|
31
|
+
end
|
32
|
+
|
33
|
+
def decode(body, options = {})
|
34
|
+
return '' if !body
|
35
|
+
return body if json?
|
36
|
+
charset = body.match(/charset\s*=[\s|\W]*([\w-]+)/)
|
37
|
+
return body if charset[1].downcase == "utf-8"
|
38
|
+
charset_code = charset_covert(charset[1])
|
39
|
+
begin
|
40
|
+
if options[:encode]
|
41
|
+
return body.encode! "utf-8", options[:encode], {:invalid => :replace}
|
42
|
+
end
|
43
|
+
body.encode! "utf-8", charset_code, {:invalid => :replace}
|
44
|
+
rescue
|
45
|
+
body
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def status
|
50
|
+
response.status
|
51
|
+
end
|
52
|
+
|
53
|
+
# Attempts to determine the content type of the response.
|
54
|
+
def content_type
|
55
|
+
((response.headers.values_at('content-type', 'Content-Type').compact.first || '').split(';').first || '').strip
|
56
|
+
end
|
57
|
+
|
58
|
+
def json?
|
59
|
+
CONTENT_TYPE[content_type] == :json || !response.body.match(/\<html/)
|
60
|
+
end
|
61
|
+
|
62
|
+
def parser
|
63
|
+
type = CONTENT_TYPE[content_type]
|
64
|
+
type = :json if type == :html && !response.body.match(/\<.*html|/)
|
65
|
+
type = :html if type.nil?
|
66
|
+
return type
|
67
|
+
end
|
68
|
+
|
69
|
+
def parsed
|
70
|
+
@parsed ||= PARSERS[parser].call(body)
|
71
|
+
end
|
72
|
+
|
73
|
+
def charset_covert(charset)
|
74
|
+
case charset
|
75
|
+
when "gb2312", "GB2312", "GBK"
|
76
|
+
"gbk"
|
77
|
+
else
|
78
|
+
charset
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
begin
|
2
|
+
require File.expand_path("./config/application")
|
3
|
+
rescue LoadError => e
|
4
|
+
system_boot = File.expand_path("./config/boot.rb")
|
5
|
+
require system_boot if File.exist?(system_boot)
|
6
|
+
end
|
7
|
+
|
8
|
+
if defined?(Padrino)
|
9
|
+
puts "read padrino environment #{Padrino.env}"
|
10
|
+
BOTDIR = Dir.glob("#{Padrino.root}/app/bots/**/*_bot.rb")
|
11
|
+
if Padrino.env != :development
|
12
|
+
SpiderBot::Logging.initialize_logger("#{Padrino.root}/log/spider.log")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
if defined?(Rails)
|
17
|
+
class Railtie < Rails::Railtie
|
18
|
+
initializer "disable eager load" do |app|
|
19
|
+
app.config.eager_load = false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
Rails.application.initialize!
|
23
|
+
puts "read rails environment #{Rails.env}"
|
24
|
+
BOTDIR = Dir.glob("#{Rails.root}/app/bots/**/*_bot.rb")
|
25
|
+
Rails.logger.level = Logger::WARN
|
26
|
+
if !Rails.env.development?
|
27
|
+
SpiderBot::Logging.initialize_logger("#{Rails.root}/log/spider.log")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module SpiderBot
|
4
|
+
module Logging
|
5
|
+
def self.initialize_logger(log_target = STDOUT)
|
6
|
+
oldlogger = defined?(@logger) ? @logger : nil
|
7
|
+
@logger = Logger.new(log_target)
|
8
|
+
@logger.level = Logger::INFO
|
9
|
+
oldlogger.close if oldlogger && !$TESTING
|
10
|
+
@logger
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.logger
|
14
|
+
defined?(@logger) ? @logger : initialize_logger
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.logger=(log)
|
18
|
+
@logger = (log ? log : Logger.new('/dev/null'))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
|
2
|
+
date:
|
3
|
+
time:
|
4
|
+
year: "年|year|years"
|
5
|
+
month: "月|month|months|mon"
|
6
|
+
week: "周|星期|week|weeks"
|
7
|
+
day: "日|天|day|days"
|
8
|
+
hour: "时|小时|時|小時|hour|hours|hr|hrs|h"
|
9
|
+
min: "分|分钟|minute|minutes|min|mins|m"
|
10
|
+
second: "秒|second|seconds|sec|secs|s"
|
11
|
+
month:
|
12
|
+
jan: "january|jan|一月"
|
13
|
+
feb: "february|feb|二月"
|
14
|
+
mar: "march|mar|三月"
|
15
|
+
apr: "april|apr|四月"
|
16
|
+
may: "may|五月"
|
17
|
+
jun: "june|jun|六月"
|
18
|
+
jul: "july|jul|七月"
|
19
|
+
aug: "august|aug|八月"
|
20
|
+
sep: "september|sep|九月"
|
21
|
+
oct: "october|oct|十月"
|
22
|
+
nov: "november|nov|十一月"
|
23
|
+
dec: "december|dec|十二月"
|
24
|
+
other:
|
25
|
+
ago: "ago|前|以前"
|
26
|
+
today: "today|今天"
|
27
|
+
am: "AM|am|上午"
|
28
|
+
pm: "PM|pm|下午"
|
29
|
+
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require "yaml"
|
3
|
+
require "active_support/time"
|
4
|
+
require 'tzinfo'
|
5
|
+
|
6
|
+
DATE_CONFIG = YAML.load_file(File.expand_path("../date.yml", __FILE__))
|
7
|
+
|
8
|
+
class String
|
9
|
+
|
10
|
+
# Parse content to local time
|
11
|
+
#
|
12
|
+
# @param [String] zone time zone with site
|
13
|
+
def parse_time(zone = nil)
|
14
|
+
Time.zone = zone.nil? ? "UTC" : zone
|
15
|
+
|
16
|
+
@time_config = DATE_CONFIG["date"]["time"]
|
17
|
+
@month_config = DATE_CONFIG["date"]["month"]
|
18
|
+
@other_config = DATE_CONFIG["date"]["other"]
|
19
|
+
@time_str = @time_config.values.join("|")
|
20
|
+
|
21
|
+
@time_regex = %r"\d+[\s|\S]*(?:#{@time_str})\s*(?:#{@other_config["ago"]})"
|
22
|
+
@today_regex = %r"#{@other_config["today"]}\s*\d{1,2}:\d{1,2}"
|
23
|
+
|
24
|
+
case self
|
25
|
+
when @time_regex
|
26
|
+
parse_date_ago
|
27
|
+
when @today_regex
|
28
|
+
parse_today
|
29
|
+
else
|
30
|
+
parse_date
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
# Parse content if has keyword mean 'ago'
|
38
|
+
def parse_date_ago
|
39
|
+
now = Time.zone.now
|
40
|
+
regex_text = self.match(@time_regex)[0]
|
41
|
+
@time = case regex_text
|
42
|
+
when %r"#{@time_config["year"]}"
|
43
|
+
now.years_ago regex_text.match(/\d+/)[0].to_i
|
44
|
+
when %r"#{@time_config["month"]}"
|
45
|
+
now.months_ago regex_text.match(/\d+/)[0].to_i
|
46
|
+
when %r"#{@time_config["week"]}"
|
47
|
+
now.ago regex_text.match(/\d+/)[0].to_i * 60 * 60 * 24 * 7
|
48
|
+
when %r"#{@time_config["day"]}"
|
49
|
+
now.ago regex_text.match(/\d+/)[0].to_i * 60 * 60 * 24
|
50
|
+
when %r"#{@time_config["hour"]}"
|
51
|
+
now.ago regex_text.match(/\d+/)[0].to_i * 60 * 60
|
52
|
+
when %r"#{@time_config["min"]}"
|
53
|
+
now.ago regex_text.match(/\d+/)[0].to_i * 60
|
54
|
+
when %r"#{@time_config["second"]}"
|
55
|
+
now.ago regex_text.match(/\d+/)[0].to_i
|
56
|
+
else
|
57
|
+
raise "get date errors"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Parse content if has keyword mean 'today'
|
62
|
+
def parse_today
|
63
|
+
now = Time.zone.now
|
64
|
+
regex_text = self.match(/\d{1,2}\s*:\s*\d{1,2}:*\d{0,2}/)[0]
|
65
|
+
time_str = now.to_date.to_s + " " + regex_text
|
66
|
+
Time.zone.parse(time_str)
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse_date
|
70
|
+
date_regex1 = %r"(\d{4})[^\d|:]{1,2}(\d{1,2})[^\d|:]{1,2}(\d{1,2})"
|
71
|
+
date_regex2 = %r"(\d{1,2})[^\d|:]{1,2}(\d{1,2})[^\d|:]{1,2}(\d{4})"
|
72
|
+
date_regex3 = %r"([\w|\W]+)[^\d|\w]{1,2}(\d{1,2})[^\d|:]*(\d{4})"
|
73
|
+
time = self.match %r"\d{1,2}\s*:\d{1,2}\s*:*\d{0,2}(?:#{@other_config["am"]}|#{@other_config["pm"]})*"
|
74
|
+
time = time[0].gsub(%r"#{@other_config["am"]}","am").gsub(%r"#{@other_config["pm"]}","pm") if time
|
75
|
+
|
76
|
+
case self
|
77
|
+
when date_regex1
|
78
|
+
|
79
|
+
date_text = self.match date_regex1
|
80
|
+
|
81
|
+
Time.zone.parse "#{date_text[1]}-#{date_text[2]}-#{date_text[3]} #{time}"
|
82
|
+
when date_regex2
|
83
|
+
date_text = self.match date_regex2
|
84
|
+
Time.zone.parse("#{date_text[3]}-#{date_text[1]}-#{date_text[2]} #{time}")
|
85
|
+
when date_regex3
|
86
|
+
date_text = self.match date_regex3
|
87
|
+
month = case date_text[1].downcase
|
88
|
+
when %r"#{@month_config["jan"]}"
|
89
|
+
1
|
90
|
+
when %r"#{@month_config["feb"]}"
|
91
|
+
2
|
92
|
+
when %r"#{@month_config["mar"]}"
|
93
|
+
3
|
94
|
+
when %r"#{@month_config["apr"]}"
|
95
|
+
4
|
96
|
+
when %r"#{@month_config["may"]}"
|
97
|
+
5
|
98
|
+
when %r"#{@month_config["jun"]}"
|
99
|
+
6
|
100
|
+
when %r"#{@month_config["jul"]}"
|
101
|
+
7
|
102
|
+
when %r"#{@month_config["aug"]}"
|
103
|
+
8
|
104
|
+
when %r"#{@month_config["sep"]}"
|
105
|
+
9
|
106
|
+
when %r"#{@month_config["oct"]}"
|
107
|
+
10
|
108
|
+
when %r"#{@month_config["nov"]}"
|
109
|
+
11
|
110
|
+
when %r"#{@month_config["dec"]}"
|
111
|
+
12
|
112
|
+
end
|
113
|
+
Time.zone.parse "#{date_text[3]}-#{month}-#{date_text[2]} #{time}"
|
114
|
+
else
|
115
|
+
Time.zone.parse(self)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
data/lib/spider_bot.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "faraday"
|
3
|
+
require 'uri'
|
4
|
+
require "nokogiri"
|
5
|
+
require "multi_json"
|
6
|
+
require "multi_xml"
|
7
|
+
require 'active_support/core_ext/string/conversions'
|
8
|
+
require 'spider_bot/logging'
|
9
|
+
require "spider_bot/version"
|
10
|
+
|
11
|
+
module SpiderBot
|
12
|
+
class << self
|
13
|
+
def crawl(url, options = {}, &block)
|
14
|
+
crawl_instance = Crawl.new(url, options)
|
15
|
+
return crawl_instance.crawl_data if !block_given?
|
16
|
+
crawl_instance.instance_eval &block
|
17
|
+
end
|
18
|
+
|
19
|
+
def logger
|
20
|
+
SpiderBot::Logging.logger
|
21
|
+
end
|
22
|
+
|
23
|
+
def logger=(log)
|
24
|
+
SpiderBot::Logging.logger = log
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
autoload :Crawl, 'spider_bot/crawl'
|
29
|
+
autoload :Base, 'spider_bot/base'
|
30
|
+
module Http
|
31
|
+
autoload :Client, 'spider_bot/http/client'
|
32
|
+
autoload :Response, 'spider_bot/http/response'
|
33
|
+
end
|
34
|
+
autoload :Engine, 'spider_bot/engine'
|
35
|
+
end
|
36
|
+
|
37
|
+
require 'spider_bot/railte' if defined?(Rails)
|
data/spider_bot.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'spider_bot/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "spider_bot"
|
8
|
+
spec.version = SpiderBot::VERSION
|
9
|
+
spec.authors = ["yee.li"]
|
10
|
+
spec.email = ["yeeli@outlook.com"]
|
11
|
+
spec.summary = %q{splider bot}
|
12
|
+
spec.description = %q{splider bot}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
|
24
|
+
spec.add_dependency "activesupport"
|
25
|
+
spec.add_dependency "faraday"
|
26
|
+
spec.add_dependency "nokogiri"
|
27
|
+
spec.add_dependency "multi_json"
|
28
|
+
spec.add_dependency "multi_xml"
|
29
|
+
spec.add_dependency "tzinfo"
|
30
|
+
spec.add_dependency "thor"
|
31
|
+
spec.add_dependency 'daemons'
|
32
|
+
end
|