spider_bot 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.yardopts +2 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +148 -0
- data/Rakefile +2 -0
- data/bin/spider +12 -0
- data/lib/spider_bot/base.rb +31 -0
- data/lib/spider_bot/cli.rb +183 -0
- data/lib/spider_bot/crawl.rb +235 -0
- data/lib/spider_bot/error.rb +5 -0
- data/lib/spider_bot/http/client.rb +166 -0
- data/lib/spider_bot/http/response.rb +83 -0
- data/lib/spider_bot/load.rb +30 -0
- data/lib/spider_bot/logging.rb +21 -0
- data/lib/spider_bot/railte.rb +6 -0
- data/lib/spider_bot/string/date.yml +29 -0
- data/lib/spider_bot/string/time.rb +119 -0
- data/lib/spider_bot/version.rb +3 -0
- data/lib/spider_bot.rb +37 -0
- data/spider_bot.gemspec +32 -0
- metadata +206 -0
@@ -0,0 +1,166 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
|
3
|
+
module SpiderBot
|
4
|
+
module Http
|
5
|
+
class Client
|
6
|
+
|
7
|
+
# return url for HttpClient
|
8
|
+
attr_reader :url
|
9
|
+
|
10
|
+
# return http user_agent for HttpClient
|
11
|
+
attr_reader :user_agent
|
12
|
+
|
13
|
+
attr_reader :headers
|
14
|
+
|
15
|
+
#
|
16
|
+
attr_accessor :options
|
17
|
+
|
18
|
+
# return connection for HttpClient
|
19
|
+
attr_accessor :connection
|
20
|
+
|
21
|
+
attr_accessor :conn_build
|
22
|
+
|
23
|
+
# Supported User-Agent
|
24
|
+
#
|
25
|
+
# * Linux Firefox (3.6.1)
|
26
|
+
# * Linux Konqueror (3)
|
27
|
+
# * Linux Mozilla
|
28
|
+
# * Linux Chrome
|
29
|
+
# * Mac Firefox
|
30
|
+
# * Mac Mozilla
|
31
|
+
# * Mac Chrome
|
32
|
+
# * Mac Safari
|
33
|
+
# * Mechanize (default)
|
34
|
+
# * Windows IE 6
|
35
|
+
# * Windows IE 7
|
36
|
+
# * Windows IE 8
|
37
|
+
# * Windows IE 9
|
38
|
+
# * Windows Mozilla
|
39
|
+
# * iPhone (3.0)
|
40
|
+
# * iPad
|
41
|
+
# * Android
|
42
|
+
|
43
|
+
USER_AGENT = {
|
44
|
+
'bot' => "bot/#{SpiderBot::VERSION}",
|
45
|
+
'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
|
46
|
+
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
47
|
+
'Linux Chrome' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624 Chrome/26.0.1410.43',
|
48
|
+
'Mac Firefox' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) Gecko/20100101 Firefox/35.0',
|
49
|
+
'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/600.3.18 (KHTML, like Gecko) Version/8.0.3 Safari/600.3.18',
|
50
|
+
'Mac Chrome' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36',
|
51
|
+
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
52
|
+
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
53
|
+
'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
54
|
+
'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
|
55
|
+
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
|
56
|
+
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
|
57
|
+
'iPad' => 'Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
|
58
|
+
'Android' => 'Mozilla/5.0 (Linux; U; Android 3.0; en-us) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
|
59
|
+
}
|
60
|
+
|
61
|
+
# Initialize a new HttpClient
|
62
|
+
#
|
63
|
+
# @param uri [String] the uri with
|
64
|
+
# @param options [Hash] the options to create a http with configure
|
65
|
+
# @option options [String] :header set the http request headers
|
66
|
+
# @yield [builder]
|
67
|
+
#
|
68
|
+
# @example
|
69
|
+
# http = HttpClient.new
|
70
|
+
#
|
71
|
+
# http = HttpClient.new do |http|
|
72
|
+
# http.user_agent= "Mac Safri"
|
73
|
+
# http.url= "http://example.com"
|
74
|
+
# end
|
75
|
+
|
76
|
+
def initialize(uri = nil, options = nil, &block)
|
77
|
+
@url = uri
|
78
|
+
@options = options
|
79
|
+
@user_agent ||= USER_AGENT['bot']
|
80
|
+
yield self if block_given?
|
81
|
+
end
|
82
|
+
|
83
|
+
def builder(&block)
|
84
|
+
@conn_build = block
|
85
|
+
end
|
86
|
+
|
87
|
+
# Set the url for HttpClient
|
88
|
+
#
|
89
|
+
# @param uri [String] the HttpClient url
|
90
|
+
|
91
|
+
def url=(uri)
|
92
|
+
@conn = nil
|
93
|
+
@url = uri
|
94
|
+
end
|
95
|
+
|
96
|
+
# Set the headers for HttpClient
|
97
|
+
#
|
98
|
+
# @param headers [String] the HttpClient url
|
99
|
+
# @return [String]
|
100
|
+
|
101
|
+
def headers=(headers)
|
102
|
+
@headers = headers.merge({"User-Agent" => user_agent})
|
103
|
+
end
|
104
|
+
|
105
|
+
# Set the user agent for HttpClient
|
106
|
+
#
|
107
|
+
# @param name [Symbol] the HttpClient user agent
|
108
|
+
|
109
|
+
def user_agent=(name)
|
110
|
+
@user_agent = USER_AGENT[name] || USER_AGENT['bot']
|
111
|
+
end
|
112
|
+
|
113
|
+
# The Faraday connection object
|
114
|
+
# @return [connection] The Faraday connection builder
|
115
|
+
|
116
|
+
def connection
|
117
|
+
@connection ||= begin
|
118
|
+
conn = Faraday.new(url: url)
|
119
|
+
conn.build do |b|
|
120
|
+
conn_build.call(b)
|
121
|
+
end if conn_build
|
122
|
+
conn
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
# Make request with HttpClient
|
128
|
+
#
|
129
|
+
# @param verb [Symbol] verb one of :get, :post, :put, :delete
|
130
|
+
# @param uri [String] URL path for request
|
131
|
+
# @param query [Hash] additional query parameters for the URL of the request
|
132
|
+
|
133
|
+
def request(verb, uri, query={})
|
134
|
+
verb == :get ? query_get = query : query_post = query
|
135
|
+
uri = connection.build_url(uri, query_get)
|
136
|
+
|
137
|
+
response = connection.run_request(verb, uri, query_post, headers) do |request|
|
138
|
+
yield request if block_given?
|
139
|
+
end
|
140
|
+
response = Response.new(response)
|
141
|
+
|
142
|
+
case response.status
|
143
|
+
when 301, 302, 303, 307
|
144
|
+
request(verb, response.headers['location'], query)
|
145
|
+
when 200..299, 300..399
|
146
|
+
response
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Handle get request with HttpClient
|
151
|
+
#
|
152
|
+
# @param uri [String] URL path for request
|
153
|
+
# @param query [Hash] additional query parameters for the URL of the request
|
154
|
+
|
155
|
+
def get(uri, query = {}, &block)
|
156
|
+
request(:get, uri, query, &block)
|
157
|
+
end
|
158
|
+
|
159
|
+
# Handle post request with HttpClient
|
160
|
+
# @param (see #get)
|
161
|
+
def post(uri, query = {}, &block)
|
162
|
+
request(:post, uri, query, &block)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module SpiderBot
|
2
|
+
module Http
|
3
|
+
class Response
|
4
|
+
attr_reader :response
|
5
|
+
|
6
|
+
CONTENT_TYPE = {
|
7
|
+
'application/json' => :json,
|
8
|
+
'application/x-www-form-urlencoded' => :html,
|
9
|
+
'text/html' => :html,
|
10
|
+
'text/javascript' => :json,
|
11
|
+
'text/xml' => :xml
|
12
|
+
}
|
13
|
+
|
14
|
+
PARSERS = {
|
15
|
+
:json => lambda{ |body| MultiJson.respond_to?(:adapter) ? MultiJson.load(body) : MultiJson.decode(body) rescue body},
|
16
|
+
:html => lambda{ |body| Nokogiri::HTML(body)},
|
17
|
+
:xml => lambda{ |body| MultiXml.parse(body) }
|
18
|
+
}
|
19
|
+
|
20
|
+
def initialize(response)
|
21
|
+
@response = response
|
22
|
+
end
|
23
|
+
|
24
|
+
def headers
|
25
|
+
response.headers
|
26
|
+
end
|
27
|
+
|
28
|
+
def body(options = {})
|
29
|
+
options = options || {}
|
30
|
+
decode(response.body, options)
|
31
|
+
end
|
32
|
+
|
33
|
+
def decode(body, options = {})
|
34
|
+
return '' if !body
|
35
|
+
return body if json?
|
36
|
+
charset = body.match(/charset\s*=[\s|\W]*([\w-]+)/)
|
37
|
+
return body if charset[1].downcase == "utf-8"
|
38
|
+
charset_code = charset_covert(charset[1])
|
39
|
+
begin
|
40
|
+
if options[:encode]
|
41
|
+
return body.encode! "utf-8", options[:encode], {:invalid => :replace}
|
42
|
+
end
|
43
|
+
body.encode! "utf-8", charset_code, {:invalid => :replace}
|
44
|
+
rescue
|
45
|
+
body
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def status
|
50
|
+
response.status
|
51
|
+
end
|
52
|
+
|
53
|
+
# Attempts to determine the content type of the response.
|
54
|
+
def content_type
|
55
|
+
((response.headers.values_at('content-type', 'Content-Type').compact.first || '').split(';').first || '').strip
|
56
|
+
end
|
57
|
+
|
58
|
+
def json?
|
59
|
+
CONTENT_TYPE[content_type] == :json || !response.body.match(/\<html/)
|
60
|
+
end
|
61
|
+
|
62
|
+
def parser
|
63
|
+
type = CONTENT_TYPE[content_type]
|
64
|
+
type = :json if type == :html && !response.body.match(/\<.*html|/)
|
65
|
+
type = :html if type.nil?
|
66
|
+
return type
|
67
|
+
end
|
68
|
+
|
69
|
+
def parsed
|
70
|
+
@parsed ||= PARSERS[parser].call(body)
|
71
|
+
end
|
72
|
+
|
73
|
+
def charset_covert(charset)
|
74
|
+
case charset
|
75
|
+
when "gb2312", "GB2312", "GBK"
|
76
|
+
"gbk"
|
77
|
+
else
|
78
|
+
charset
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
begin
|
2
|
+
require File.expand_path("./config/application")
|
3
|
+
rescue LoadError => e
|
4
|
+
system_boot = File.expand_path("./config/boot.rb")
|
5
|
+
require system_boot if File.exist?(system_boot)
|
6
|
+
end
|
7
|
+
|
8
|
+
if defined?(Padrino)
|
9
|
+
puts "read padrino environment #{Padrino.env}"
|
10
|
+
BOTDIR = Dir.glob("#{Padrino.root}/app/bots/**/*_bot.rb")
|
11
|
+
if Padrino.env != :development
|
12
|
+
SpiderBot::Logging.initialize_logger("#{Padrino.root}/log/spider.log")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
if defined?(Rails)
|
17
|
+
class Railtie < Rails::Railtie
|
18
|
+
initializer "disable eager load" do |app|
|
19
|
+
app.config.eager_load = false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
Rails.application.initialize!
|
23
|
+
puts "read rails environment #{Rails.env}"
|
24
|
+
BOTDIR = Dir.glob("#{Rails.root}/app/bots/**/*_bot.rb")
|
25
|
+
Rails.logger.level = Logger::WARN
|
26
|
+
if !Rails.env.development?
|
27
|
+
SpiderBot::Logging.initialize_logger("#{Rails.root}/log/spider.log")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module SpiderBot
|
4
|
+
module Logging
|
5
|
+
def self.initialize_logger(log_target = STDOUT)
|
6
|
+
oldlogger = defined?(@logger) ? @logger : nil
|
7
|
+
@logger = Logger.new(log_target)
|
8
|
+
@logger.level = Logger::INFO
|
9
|
+
oldlogger.close if oldlogger && !$TESTING
|
10
|
+
@logger
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.logger
|
14
|
+
defined?(@logger) ? @logger : initialize_logger
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.logger=(log)
|
18
|
+
@logger = (log ? log : Logger.new('/dev/null'))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
|
2
|
+
date:
|
3
|
+
time:
|
4
|
+
year: "年|year|years"
|
5
|
+
month: "月|month|months|mon"
|
6
|
+
week: "周|星期|week|weeks"
|
7
|
+
day: "日|天|day|days"
|
8
|
+
hour: "时|小时|時|小時|hour|hours|hr|hrs|h"
|
9
|
+
min: "分|分钟|minute|minutes|min|mins|m"
|
10
|
+
second: "秒|second|seconds|sec|secs|s"
|
11
|
+
month:
|
12
|
+
jan: "january|jan|一月"
|
13
|
+
feb: "february|feb|二月"
|
14
|
+
mar: "march|mar|三月"
|
15
|
+
apr: "april|apr|四月"
|
16
|
+
may: "may|五月"
|
17
|
+
jun: "june|jun|六月"
|
18
|
+
jul: "july|jul|七月"
|
19
|
+
aug: "august|aug|八月"
|
20
|
+
sep: "september|sep|九月"
|
21
|
+
oct: "october|oct|十月"
|
22
|
+
nov: "november|nov|十一月"
|
23
|
+
dec: "december|dec|十二月"
|
24
|
+
other:
|
25
|
+
ago: "ago|前|以前"
|
26
|
+
today: "today|今天"
|
27
|
+
am: "AM|am|上午"
|
28
|
+
pm: "PM|pm|下午"
|
29
|
+
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require "yaml"
|
3
|
+
require "active_support/time"
|
4
|
+
require 'tzinfo'
|
5
|
+
|
6
|
+
DATE_CONFIG = YAML.load_file(File.expand_path("../date.yml", __FILE__))
|
7
|
+
|
8
|
+
class String
|
9
|
+
|
10
|
+
# Parse content to local time
|
11
|
+
#
|
12
|
+
# @param [String] zone time zone with site
|
13
|
+
def parse_time(zone = nil)
|
14
|
+
Time.zone = zone.nil? ? "UTC" : zone
|
15
|
+
|
16
|
+
@time_config = DATE_CONFIG["date"]["time"]
|
17
|
+
@month_config = DATE_CONFIG["date"]["month"]
|
18
|
+
@other_config = DATE_CONFIG["date"]["other"]
|
19
|
+
@time_str = @time_config.values.join("|")
|
20
|
+
|
21
|
+
@time_regex = %r"\d+[\s|\S]*(?:#{@time_str})\s*(?:#{@other_config["ago"]})"
|
22
|
+
@today_regex = %r"#{@other_config["today"]}\s*\d{1,2}:\d{1,2}"
|
23
|
+
|
24
|
+
case self
|
25
|
+
when @time_regex
|
26
|
+
parse_date_ago
|
27
|
+
when @today_regex
|
28
|
+
parse_today
|
29
|
+
else
|
30
|
+
parse_date
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
# Parse content if has keyword mean 'ago'
|
38
|
+
def parse_date_ago
|
39
|
+
now = Time.zone.now
|
40
|
+
regex_text = self.match(@time_regex)[0]
|
41
|
+
@time = case regex_text
|
42
|
+
when %r"#{@time_config["year"]}"
|
43
|
+
now.years_ago regex_text.match(/\d+/)[0].to_i
|
44
|
+
when %r"#{@time_config["month"]}"
|
45
|
+
now.months_ago regex_text.match(/\d+/)[0].to_i
|
46
|
+
when %r"#{@time_config["week"]}"
|
47
|
+
now.ago regex_text.match(/\d+/)[0].to_i * 60 * 60 * 24 * 7
|
48
|
+
when %r"#{@time_config["day"]}"
|
49
|
+
now.ago regex_text.match(/\d+/)[0].to_i * 60 * 60 * 24
|
50
|
+
when %r"#{@time_config["hour"]}"
|
51
|
+
now.ago regex_text.match(/\d+/)[0].to_i * 60 * 60
|
52
|
+
when %r"#{@time_config["min"]}"
|
53
|
+
now.ago regex_text.match(/\d+/)[0].to_i * 60
|
54
|
+
when %r"#{@time_config["second"]}"
|
55
|
+
now.ago regex_text.match(/\d+/)[0].to_i
|
56
|
+
else
|
57
|
+
raise "get date errors"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Parse content if has keyword mean 'today'
|
62
|
+
def parse_today
|
63
|
+
now = Time.zone.now
|
64
|
+
regex_text = self.match(/\d{1,2}\s*:\s*\d{1,2}:*\d{0,2}/)[0]
|
65
|
+
time_str = now.to_date.to_s + " " + regex_text
|
66
|
+
Time.zone.parse(time_str)
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse_date
|
70
|
+
date_regex1 = %r"(\d{4})[^\d|:]{1,2}(\d{1,2})[^\d|:]{1,2}(\d{1,2})"
|
71
|
+
date_regex2 = %r"(\d{1,2})[^\d|:]{1,2}(\d{1,2})[^\d|:]{1,2}(\d{4})"
|
72
|
+
date_regex3 = %r"([\w|\W]+)[^\d|\w]{1,2}(\d{1,2})[^\d|:]*(\d{4})"
|
73
|
+
time = self.match %r"\d{1,2}\s*:\d{1,2}\s*:*\d{0,2}(?:#{@other_config["am"]}|#{@other_config["pm"]})*"
|
74
|
+
time = time[0].gsub(%r"#{@other_config["am"]}","am").gsub(%r"#{@other_config["pm"]}","pm") if time
|
75
|
+
|
76
|
+
case self
|
77
|
+
when date_regex1
|
78
|
+
|
79
|
+
date_text = self.match date_regex1
|
80
|
+
|
81
|
+
Time.zone.parse "#{date_text[1]}-#{date_text[2]}-#{date_text[3]} #{time}"
|
82
|
+
when date_regex2
|
83
|
+
date_text = self.match date_regex2
|
84
|
+
Time.zone.parse("#{date_text[3]}-#{date_text[1]}-#{date_text[2]} #{time}")
|
85
|
+
when date_regex3
|
86
|
+
date_text = self.match date_regex3
|
87
|
+
month = case date_text[1].downcase
|
88
|
+
when %r"#{@month_config["jan"]}"
|
89
|
+
1
|
90
|
+
when %r"#{@month_config["feb"]}"
|
91
|
+
2
|
92
|
+
when %r"#{@month_config["mar"]}"
|
93
|
+
3
|
94
|
+
when %r"#{@month_config["apr"]}"
|
95
|
+
4
|
96
|
+
when %r"#{@month_config["may"]}"
|
97
|
+
5
|
98
|
+
when %r"#{@month_config["jun"]}"
|
99
|
+
6
|
100
|
+
when %r"#{@month_config["jul"]}"
|
101
|
+
7
|
102
|
+
when %r"#{@month_config["aug"]}"
|
103
|
+
8
|
104
|
+
when %r"#{@month_config["sep"]}"
|
105
|
+
9
|
106
|
+
when %r"#{@month_config["oct"]}"
|
107
|
+
10
|
108
|
+
when %r"#{@month_config["nov"]}"
|
109
|
+
11
|
110
|
+
when %r"#{@month_config["dec"]}"
|
111
|
+
12
|
112
|
+
end
|
113
|
+
Time.zone.parse "#{date_text[3]}-#{month}-#{date_text[2]} #{time}"
|
114
|
+
else
|
115
|
+
Time.zone.parse(self)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
data/lib/spider_bot.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "faraday"
|
3
|
+
require 'uri'
|
4
|
+
require "nokogiri"
|
5
|
+
require "multi_json"
|
6
|
+
require "multi_xml"
|
7
|
+
require 'active_support/core_ext/string/conversions'
|
8
|
+
require 'spider_bot/logging'
|
9
|
+
require "spider_bot/version"
|
10
|
+
|
11
|
+
module SpiderBot
|
12
|
+
class << self
|
13
|
+
def crawl(url, options = {}, &block)
|
14
|
+
crawl_instance = Crawl.new(url, options)
|
15
|
+
return crawl_instance.crawl_data if !block_given?
|
16
|
+
crawl_instance.instance_eval &block
|
17
|
+
end
|
18
|
+
|
19
|
+
def logger
|
20
|
+
SpiderBot::Logging.logger
|
21
|
+
end
|
22
|
+
|
23
|
+
def logger=(log)
|
24
|
+
SpiderBot::Logging.logger = log
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
autoload :Crawl, 'spider_bot/crawl'
|
29
|
+
autoload :Base, 'spider_bot/base'
|
30
|
+
module Http
|
31
|
+
autoload :Client, 'spider_bot/http/client'
|
32
|
+
autoload :Response, 'spider_bot/http/response'
|
33
|
+
end
|
34
|
+
autoload :Engine, 'spider_bot/engine'
|
35
|
+
end
|
36
|
+
|
37
|
+
require 'spider_bot/railte' if defined?(Rails)
|
data/spider_bot.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'spider_bot/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "spider_bot"
|
8
|
+
spec.version = SpiderBot::VERSION
|
9
|
+
spec.authors = ["yee.li"]
|
10
|
+
spec.email = ["yeeli@outlook.com"]
|
11
|
+
spec.summary = %q{splider bot}
|
12
|
+
spec.description = %q{splider bot}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
|
24
|
+
spec.add_dependency "activesupport"
|
25
|
+
spec.add_dependency "faraday"
|
26
|
+
spec.add_dependency "nokogiri"
|
27
|
+
spec.add_dependency "multi_json"
|
28
|
+
spec.add_dependency "multi_xml"
|
29
|
+
spec.add_dependency "tzinfo"
|
30
|
+
spec.add_dependency "thor"
|
31
|
+
spec.add_dependency 'daemons'
|
32
|
+
end
|