spider2 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README +15 -0
- data/Rakefile +23 -0
- data/init.rb +3 -0
- data/install.rb +2 -0
- data/lib/generators/spider/spider_generator.rb +42 -0
- data/lib/generators/spider/templates/base_page.rb +6 -0
- data/lib/generators/spider/templates/base_page_spec.rb +13 -0
- data/lib/generators/spider/templates/index_page.rb +6 -0
- data/lib/generators/spider/templates/index_page_spec.rb +14 -0
- data/lib/generators/spider/templates/index_page_test.rb +10 -0
- data/lib/generators/spider/templates/list_page.rb +6 -0
- data/lib/generators/spider/templates/list_page_spec.rb +22 -0
- data/lib/generators/spider/templates/list_page_test.rb +10 -0
- data/lib/generators/spider/templates/show_page.rb +14 -0
- data/lib/generators/spider/templates/show_page_spec.rb +19 -0
- data/lib/generators/spider/templates/show_page_test.rb +10 -0
- data/lib/generators/spider/templates/site.rb +7 -0
- data/lib/generators/spider/templates/site_spec.rb +13 -0
- data/lib/generators/spider/templates/test.rb +10 -0
- data/lib/generators/spider_migration/spider_migration_generator.rb +11 -0
- data/lib/generators/spider_migration/templates/migration.rb +42 -0
- data/lib/spider/active_record_methods.rb +60 -0
- data/lib/spider/http.rb +43 -0
- data/lib/spider/page/filter.rb +132 -0
- data/lib/spider/page/label.rb +28 -0
- data/lib/spider/page/pagination.rb +142 -0
- data/lib/spider/page/proxy.rb +149 -0
- data/lib/spider/page/publish.rb +78 -0
- data/lib/spider/page/validation.rb +136 -0
- data/lib/spider/page.rb +759 -0
- data/lib/spider/site.rb +225 -0
- data/lib/spider/spider_page.rb +18 -0
- data/lib/spider/spider_page_label.rb +5 -0
- data/lib/spider/version.rb +3 -0
- data/lib/spider.rb +81 -0
- data/lib/tasks/spider_tasks.rake +86 -0
- data/test/spider_fu_test.rb +9 -0
- data/test/test_helper.rb +4 -0
- data/uninstall.rb +2 -0
- metadata +151 -0
data/lib/spider/site.rb
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
class Spider::Site
|
|
3
|
+
# include Singleton
|
|
4
|
+
cattr_accessor :sites,:publishers
|
|
5
|
+
@@sites = []
|
|
6
|
+
|
|
7
|
+
# 可以在rails的initializer目录下设置发布设置
|
|
8
|
+
# 比如
|
|
9
|
+
# Spider::Site.publishers = [Article]
|
|
10
|
+
# 这样 任何 Page 在 调用 publish 的时候都会 调用 Article.receive_spier_page 方法
|
|
11
|
+
# 参数是一个 page 自身
|
|
12
|
+
@@publishers = []
|
|
13
|
+
attr_accessor :id,:labels,:blocks,:encoding,:disabled,:name,:index_url,:domains
|
|
14
|
+
|
|
15
|
+
class_attribute :attributes
|
|
16
|
+
self.attributes = {}
|
|
17
|
+
|
|
18
|
+
def initialize
|
|
19
|
+
# @pages = []
|
|
20
|
+
@domains = []
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.id
|
|
24
|
+
instance.id
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def self.set_example_url(*args)
|
|
28
|
+
logger.warn "Spider::Site.set_example_url will take no effect, please remove."
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.set_index_url(url)
|
|
32
|
+
instance.index_url = url
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.set_name(name)
|
|
36
|
+
instance.name = name
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.set_domains(*domains)
|
|
40
|
+
instance.domains = domains
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def self.set_domain(*args)
|
|
44
|
+
set_domains(*args)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def self.index_page
|
|
48
|
+
instance.index_page
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def self.valid_domain?(url)
|
|
52
|
+
instance.valid_domain?
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def add_domain(domain)
|
|
56
|
+
domains.push(domain)
|
|
57
|
+
domains.uniq!
|
|
58
|
+
domains
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def valid_domain?(url)
|
|
62
|
+
begin
|
|
63
|
+
host = URI(url).host
|
|
64
|
+
domains.any?{|domain| host.end_with?(domain) }
|
|
65
|
+
rescue Exception=>e
|
|
66
|
+
false
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def pages
|
|
71
|
+
parent = self.class.parent
|
|
72
|
+
constants = parent.constants
|
|
73
|
+
@pages = constants.find_all{|i| i =~ /Page/}.reject!{|i| i =~ /^BasePage$/ }.collect{|i| "#{parent}::#{i}".constantize }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def index_page
|
|
77
|
+
unless @index_page
|
|
78
|
+
page = pages.find{|page| page.name =~ /IndexPage$/}
|
|
79
|
+
if page
|
|
80
|
+
@index_page ||= page.new index_url
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
@index_page
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def self.pages
|
|
87
|
+
instance.pages
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# 开始爬行单个url
|
|
91
|
+
# 成功,则返回一个Spider::Page实例
|
|
92
|
+
# 否则返回nil
|
|
93
|
+
def crawl
|
|
94
|
+
puts "#{self.class.name}#crawl not implement"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def self.crawl(url,options={})
|
|
98
|
+
sites.each do |site|
|
|
99
|
+
if site.crawl(url,options)
|
|
100
|
+
break
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def self.set_id(id)
|
|
106
|
+
instance.id = id
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
class << self
|
|
110
|
+
alias_method :name=,:set_name
|
|
111
|
+
alias_method :id=,:set_id
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def self.inherited(klass)
|
|
116
|
+
super
|
|
117
|
+
klass.send(:include,Singleton)
|
|
118
|
+
klass.instance.id = klass.name.tableize.split("/").first
|
|
119
|
+
@@sites << klass.instance
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def self.channels
|
|
123
|
+
public_instance_methods - Spider::Site.public_instance_methods
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def channels
|
|
127
|
+
self.class.channels
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# 返回所有被注册了的站点名字
|
|
131
|
+
# Spider::Site.names #=> ['6park','sina','wenxuecity']
|
|
132
|
+
def self.names
|
|
133
|
+
sites.collect &:id
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def logger
|
|
137
|
+
self.class.logger
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def self.logger
|
|
141
|
+
Spider.logger
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def start
|
|
146
|
+
crawl
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def spider_pages(options={})
|
|
150
|
+
SpiderPage.scoped(:conditions=>{:site=>id})
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# 当 site.enable = false 的时候
|
|
154
|
+
# site.start 不会有任何动作
|
|
155
|
+
def enable?
|
|
156
|
+
!disabled
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# 是否禁用
|
|
160
|
+
# 对 enable? 的取反
|
|
161
|
+
def disabled?
|
|
162
|
+
!enable?
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def self.start(name=nil,page_index=nil)
|
|
166
|
+
load_rules
|
|
167
|
+
if name.nil?
|
|
168
|
+
@@sites.each{ |site| site.start(page_index) }
|
|
169
|
+
else
|
|
170
|
+
self[name].start(page_index)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# find site by name
|
|
176
|
+
# eg:
|
|
177
|
+
# site = Spider::Site['Sina']
|
|
178
|
+
# site.start if site
|
|
179
|
+
# 获得已经注册了的站点
|
|
180
|
+
# Spider::Site['sina'] #=> #<Spider::Site name='sina'>
|
|
181
|
+
# 如果不存在则返回 nil
|
|
182
|
+
def self.[](id)
|
|
183
|
+
site = sites.find{|site| site.id == id.to_s}
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def self.all
|
|
187
|
+
sites
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def self.full_page_names
|
|
192
|
+
Spider::Site.all.collect{|site| site.name }.flatten
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# 获得站点下的所有示例url
|
|
196
|
+
def example_url
|
|
197
|
+
pages.collect{|page| page.example_url }.flatten
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def self.example_url
|
|
201
|
+
instance.example_url
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def self.find_pages(options={})
|
|
205
|
+
all_pages = Spider::Site.all.collect{|site| site.pages }.flatten
|
|
206
|
+
attributes = [options[:attributes]].flatten.compact
|
|
207
|
+
|
|
208
|
+
unless attributes.empty?
|
|
209
|
+
all_pages = all_pages.find_all{|page|
|
|
210
|
+
pass = true
|
|
211
|
+
attributes.each do |attribute|
|
|
212
|
+
unless page.attribute_names.include?(attribute)
|
|
213
|
+
pass = false
|
|
214
|
+
break
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
pass
|
|
218
|
+
}
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
all_pages
|
|
222
|
+
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
class Spider::SpiderPage < ActiveRecord::Base
|
|
3
|
+
validates_uniqueness_of :url
|
|
4
|
+
has_many :labels,:class_name=>"Spider::SpiderPageLabel",:foreign_key=>"page_id"
|
|
5
|
+
|
|
6
|
+
def labels_data
|
|
7
|
+
@labels_data ||= returning({}) do |hash|
|
|
8
|
+
labels.each{|label| hash[label.name] = label.value }
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def has_label?
|
|
13
|
+
!labels_count.zero?
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
accepts_nested_attributes_for :labels
|
|
17
|
+
|
|
18
|
+
end
|
data/lib/spider.rb
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#require "hpricot"
|
|
3
|
+
module Spider
|
|
4
|
+
def self.logger
|
|
5
|
+
unless @logger
|
|
6
|
+
@logger = Logger.new(File.join(Rails.root,"log","spider_#{Rails.env}.log"))
|
|
7
|
+
@logger.level = Logger::DEBUG
|
|
8
|
+
end
|
|
9
|
+
@logger
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.reload
|
|
13
|
+
error_occur = false
|
|
14
|
+
Dir[Rails.root.join("spiders","**","*.rb").to_s].each do |file|
|
|
15
|
+
begin
|
|
16
|
+
load file
|
|
17
|
+
rescue Exception => e
|
|
18
|
+
error_occur = true
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
!error_occur
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
module Spider
|
|
27
|
+
module Sites
|
|
28
|
+
# 所有站点,都会使用这个namespace
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
require "spider/http"
|
|
33
|
+
require "spider/site"
|
|
34
|
+
require "spider/page"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
require "spider/page/filter"
|
|
38
|
+
Spider::Page.send(:include,Spider::Page::Filter)
|
|
39
|
+
|
|
40
|
+
require "spider/page/publish"
|
|
41
|
+
Spider::Page.send(:include,Spider::Page::Publish)
|
|
42
|
+
|
|
43
|
+
require "spider/page/validation"
|
|
44
|
+
Spider::Page.send(:include,Spider::Page::Validation)
|
|
45
|
+
|
|
46
|
+
require "spider/page/pagination"
|
|
47
|
+
Spider::Page.send(:include,Spider::Page::Pagination)
|
|
48
|
+
|
|
49
|
+
require "spider/page/proxy"
|
|
50
|
+
Spider::Page.send(:include,Spider::Page::Proxy)
|
|
51
|
+
|
|
52
|
+
require "spider/page/label"
|
|
53
|
+
Spider::Page.send(:include,Spider::Page::Label)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
spiders_dir = File.join(Rails.root,"spiders")
|
|
57
|
+
$:.push(spiders_dir)
|
|
58
|
+
|
|
59
|
+
# define constants
|
|
60
|
+
Dir[File.join(spiders_dir,"*")].each do |dir|
|
|
61
|
+
dir_name = dir.gsub(spiders_dir,"").gsub(/^\//,"")
|
|
62
|
+
Object.const_set(dir_name.classify,Module.new)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# 先包含初始化文件
|
|
66
|
+
init_file = File.join(spiders_dir,"init.rb")
|
|
67
|
+
require init_file if File.exists? init_file
|
|
68
|
+
|
|
69
|
+
file_patten = File.join(spiders_dir,"**","*.rb")
|
|
70
|
+
files = Dir[file_patten]
|
|
71
|
+
|
|
72
|
+
site_files = files.find_all{|i| i =~ /site\.rb/}
|
|
73
|
+
site_files.each{|i| require i}
|
|
74
|
+
|
|
75
|
+
base_page_files = files.find_all{|i| i =~ /base_page\.rb/}
|
|
76
|
+
base_page_files.each{|i| require i}
|
|
77
|
+
|
|
78
|
+
files.each{|i| require i }
|
|
79
|
+
|
|
80
|
+
# 包含 active record methods
|
|
81
|
+
require "spider/active_record_methods"
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# desc "Explaining what the task does"
|
|
2
|
+
# task :spider_fu do
|
|
3
|
+
# # Task goes here
|
|
4
|
+
# end
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
desc "开始采集"
|
|
8
|
+
namespace :spider do
|
|
9
|
+
|
|
10
|
+
namespace :proxy do
|
|
11
|
+
desc "test proxy PAGE=XXXX::BasePage"
|
|
12
|
+
task :test => :environment do
|
|
13
|
+
klass = ENV['PAGE']
|
|
14
|
+
begin
|
|
15
|
+
klass = klass.constantize
|
|
16
|
+
rescue Exception => e
|
|
17
|
+
puts "unknow class `#{klass}`, please set a right spider page class to PAGE=XXXX::XxxPage"
|
|
18
|
+
exit
|
|
19
|
+
end
|
|
20
|
+
result = klass.validate_proxies
|
|
21
|
+
puts "valid proxies:"
|
|
22
|
+
result[:valid].each do |proxy|
|
|
23
|
+
puts proxy.join(":")
|
|
24
|
+
end
|
|
25
|
+
puts
|
|
26
|
+
puts "invalid proxies:"
|
|
27
|
+
result[:valid].each do |proxy|
|
|
28
|
+
puts proxy.join(":")
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
task :start=>:environment do
|
|
34
|
+
puts "spider:start" => __LINE__
|
|
35
|
+
site = ENV['SITE']
|
|
36
|
+
if site.blank?
|
|
37
|
+
Spider::Site.all.each do |site|
|
|
38
|
+
ENV['SITE'] = site.id
|
|
39
|
+
Rake::Task['spider:site:start'].execute
|
|
40
|
+
end
|
|
41
|
+
else
|
|
42
|
+
Rake::Task['spider:site:start'].execute
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
namespace :site do
|
|
48
|
+
desc "开始单个网站"
|
|
49
|
+
task :start=>:environment do
|
|
50
|
+
site = ENV['SITE']
|
|
51
|
+
publishers = ENV['PUBLISHERS']
|
|
52
|
+
if publishers.blank?
|
|
53
|
+
puts "WARNING: PUBLISHERS is blank."
|
|
54
|
+
next
|
|
55
|
+
else
|
|
56
|
+
Spider::Page.publishers = publishers.split(",")
|
|
57
|
+
puts "Set publishers to #{Spider::Page.publishers.inspect}"
|
|
58
|
+
end
|
|
59
|
+
site = Spider::Site[site]
|
|
60
|
+
if site
|
|
61
|
+
puts site.name
|
|
62
|
+
puts "正在运行..."
|
|
63
|
+
site.start
|
|
64
|
+
else
|
|
65
|
+
puts "site(#{ENV['SITE']}) not found"
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
Rake::TestTask.new(:test => 'db:test:prepare') do |t|
|
|
73
|
+
t.libs << 'test'
|
|
74
|
+
if ENV['SPIDER']
|
|
75
|
+
t.pattern = "test/spiders/#{ENV['SPIDER']}/*_test.rb"
|
|
76
|
+
else
|
|
77
|
+
t.pattern = 'test/spiders/**/*_test.rb'
|
|
78
|
+
end
|
|
79
|
+
t.verbose = true
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
Rake::Task['spider:test'].comment = "测试spider规则"
|
|
83
|
+
|
|
84
|
+
# 按照规则 spider 名字,生成 test
|
|
85
|
+
|
|
86
|
+
end
|
data/test/test_helper.rb
ADDED
data/uninstall.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: spider2
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 29
|
|
5
|
+
prerelease:
|
|
6
|
+
segments:
|
|
7
|
+
- 0
|
|
8
|
+
- 0
|
|
9
|
+
- 1
|
|
10
|
+
version: 0.0.1
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- aotianlong
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2012-03-04 00:00:00 Z
|
|
19
|
+
dependencies:
|
|
20
|
+
- !ruby/object:Gem::Dependency
|
|
21
|
+
name: rails
|
|
22
|
+
prerelease: false
|
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
24
|
+
none: false
|
|
25
|
+
requirements:
|
|
26
|
+
- - ~>
|
|
27
|
+
- !ruby/object:Gem::Version
|
|
28
|
+
hash: 15
|
|
29
|
+
segments:
|
|
30
|
+
- 3
|
|
31
|
+
- 2
|
|
32
|
+
- 0
|
|
33
|
+
version: 3.2.0
|
|
34
|
+
type: :runtime
|
|
35
|
+
version_requirements: *id001
|
|
36
|
+
- !ruby/object:Gem::Dependency
|
|
37
|
+
name: htmlentities
|
|
38
|
+
prerelease: false
|
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
|
40
|
+
none: false
|
|
41
|
+
requirements:
|
|
42
|
+
- - ~>
|
|
43
|
+
- !ruby/object:Gem::Version
|
|
44
|
+
hash: 59
|
|
45
|
+
segments:
|
|
46
|
+
- 4
|
|
47
|
+
- 1
|
|
48
|
+
- 0
|
|
49
|
+
version: 4.1.0
|
|
50
|
+
type: :runtime
|
|
51
|
+
version_requirements: *id002
|
|
52
|
+
- !ruby/object:Gem::Dependency
|
|
53
|
+
name: sqlite3
|
|
54
|
+
prerelease: false
|
|
55
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
|
56
|
+
none: false
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
hash: 3
|
|
61
|
+
segments:
|
|
62
|
+
- 0
|
|
63
|
+
version: "0"
|
|
64
|
+
type: :development
|
|
65
|
+
version_requirements: *id003
|
|
66
|
+
description: a framework to crawl web pages
|
|
67
|
+
email:
|
|
68
|
+
- aotianlong@gmail.com
|
|
69
|
+
executables: []
|
|
70
|
+
|
|
71
|
+
extensions: []
|
|
72
|
+
|
|
73
|
+
extra_rdoc_files: []
|
|
74
|
+
|
|
75
|
+
files:
|
|
76
|
+
- lib/generators/spider/spider_generator.rb
|
|
77
|
+
- lib/generators/spider/templates/base_page.rb
|
|
78
|
+
- lib/generators/spider/templates/base_page_spec.rb
|
|
79
|
+
- lib/generators/spider/templates/index_page.rb
|
|
80
|
+
- lib/generators/spider/templates/index_page_spec.rb
|
|
81
|
+
- lib/generators/spider/templates/index_page_test.rb
|
|
82
|
+
- lib/generators/spider/templates/list_page.rb
|
|
83
|
+
- lib/generators/spider/templates/list_page_spec.rb
|
|
84
|
+
- lib/generators/spider/templates/list_page_test.rb
|
|
85
|
+
- lib/generators/spider/templates/show_page.rb
|
|
86
|
+
- lib/generators/spider/templates/show_page_spec.rb
|
|
87
|
+
- lib/generators/spider/templates/show_page_test.rb
|
|
88
|
+
- lib/generators/spider/templates/site.rb
|
|
89
|
+
- lib/generators/spider/templates/site_spec.rb
|
|
90
|
+
- lib/generators/spider/templates/test.rb
|
|
91
|
+
- lib/generators/spider_migration/spider_migration_generator.rb
|
|
92
|
+
- lib/generators/spider_migration/templates/migration.rb
|
|
93
|
+
- lib/spider/active_record_methods.rb
|
|
94
|
+
- lib/spider/http.rb
|
|
95
|
+
- lib/spider/page/filter.rb
|
|
96
|
+
- lib/spider/page/label.rb
|
|
97
|
+
- lib/spider/page/pagination.rb
|
|
98
|
+
- lib/spider/page/proxy.rb
|
|
99
|
+
- lib/spider/page/publish.rb
|
|
100
|
+
- lib/spider/page/validation.rb
|
|
101
|
+
- lib/spider/page.rb
|
|
102
|
+
- lib/spider/site.rb
|
|
103
|
+
- lib/spider/spider_page.rb
|
|
104
|
+
- lib/spider/spider_page_label.rb
|
|
105
|
+
- lib/spider/version.rb
|
|
106
|
+
- lib/spider.rb
|
|
107
|
+
- lib/tasks/spider_tasks.rake
|
|
108
|
+
- MIT-LICENSE
|
|
109
|
+
- Rakefile
|
|
110
|
+
- init.rb
|
|
111
|
+
- install.rb
|
|
112
|
+
- README
|
|
113
|
+
- uninstall.rb
|
|
114
|
+
- test/spider_fu_test.rb
|
|
115
|
+
- test/test_helper.rb
|
|
116
|
+
homepage: http://www.powerapple.com
|
|
117
|
+
licenses: []
|
|
118
|
+
|
|
119
|
+
post_install_message:
|
|
120
|
+
rdoc_options: []
|
|
121
|
+
|
|
122
|
+
require_paths:
|
|
123
|
+
- lib
|
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
125
|
+
none: false
|
|
126
|
+
requirements:
|
|
127
|
+
- - ">="
|
|
128
|
+
- !ruby/object:Gem::Version
|
|
129
|
+
hash: 3
|
|
130
|
+
segments:
|
|
131
|
+
- 0
|
|
132
|
+
version: "0"
|
|
133
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
134
|
+
none: false
|
|
135
|
+
requirements:
|
|
136
|
+
- - ">="
|
|
137
|
+
- !ruby/object:Gem::Version
|
|
138
|
+
hash: 3
|
|
139
|
+
segments:
|
|
140
|
+
- 0
|
|
141
|
+
version: "0"
|
|
142
|
+
requirements: []
|
|
143
|
+
|
|
144
|
+
rubyforge_project:
|
|
145
|
+
rubygems_version: 1.8.15
|
|
146
|
+
signing_key:
|
|
147
|
+
specification_version: 3
|
|
148
|
+
summary: spider
|
|
149
|
+
test_files:
|
|
150
|
+
- test/spider_fu_test.rb
|
|
151
|
+
- test/test_helper.rb
|