apollo-crawler 0.0.40 → 0.0.41
Sign up to get free protection for your applications and to get access to all the features.
data/bin/apollo-crawler
CHANGED
@@ -25,6 +25,7 @@ require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'versio
|
|
25
25
|
module Crawler
|
26
26
|
class Program
|
27
27
|
@@PLUGIN_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "plugins")
|
28
|
+
@@FORMATTERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "formatters")
|
28
29
|
@@PLUGIN_TEMPLATE_NAME = "plugin_template.rb"
|
29
30
|
|
30
31
|
# This hash will hold all of the options
|
@@ -33,10 +34,12 @@ module Crawler
|
|
33
34
|
@options = nil
|
34
35
|
@optparser = nil
|
35
36
|
@plugins = nil
|
37
|
+
@formatters = nil
|
36
38
|
|
37
39
|
# Initializer - Constructor
|
38
40
|
def initialize
|
39
41
|
@plugins = {}
|
42
|
+
@formatters = {}
|
40
43
|
end
|
41
44
|
|
42
45
|
# Initialize command-line options
|
@@ -47,6 +50,9 @@ module Crawler
|
|
47
50
|
@options[:plugin_dirs] = [
|
48
51
|
@@PLUGIN_DIR
|
49
52
|
]
|
53
|
+
@options[:formatter_dirs] = [
|
54
|
+
@@FORMATTERS_DIR
|
55
|
+
]
|
50
56
|
@options[:generate_plugin] = nil
|
51
57
|
|
52
58
|
@optparser = OptionParser.new do | opts |
|
@@ -115,15 +121,48 @@ module Crawler
|
|
115
121
|
end
|
116
122
|
end
|
117
123
|
|
124
|
+
# Register formatters
|
125
|
+
def register_formatters(dir)
|
126
|
+
if(@options[:verbose])
|
127
|
+
puts "Registering formatters - '#{dir}'"
|
128
|
+
end
|
129
|
+
|
130
|
+
files = File.join(dir, "**", "*.rb")
|
131
|
+
Dir.glob(files).each do |file|
|
132
|
+
require file
|
133
|
+
end
|
134
|
+
|
135
|
+
tmp = Apollo::Crawler::Formatters.constants.select { |c|
|
136
|
+
Class === Apollo::Crawler::Formatters.const_get(c)
|
137
|
+
}
|
138
|
+
|
139
|
+
tmp.each do |x|
|
140
|
+
klass = Object.const_get('Apollo').const_get('Crawler').const_get('Formatters').const_get(x)
|
141
|
+
@formatters.merge!({ x.downcase.to_s => klass})
|
142
|
+
end
|
143
|
+
|
144
|
+
if(@options[:verbose])
|
145
|
+
@formatters.each do |formatter, klass|
|
146
|
+
name = klass.new.class.name
|
147
|
+
|
148
|
+
if name == "Apollo::Crawler::Formatters::Formatter"
|
149
|
+
next
|
150
|
+
end
|
151
|
+
|
152
|
+
puts "Registered formatter '#{formatter}' -> '#{name}'"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
118
157
|
# Register plugins (specific crawlers)
|
119
158
|
def register_plugins(dir)
|
120
159
|
if(@options[:verbose])
|
121
160
|
puts "Registering plugins - '#{dir}'"
|
122
161
|
end
|
123
162
|
|
124
|
-
|
125
|
-
Dir.glob(
|
126
|
-
require
|
163
|
+
files = File.join(dir, "**", "*.rb")
|
164
|
+
Dir.glob(files).each do |file|
|
165
|
+
require file
|
127
166
|
end
|
128
167
|
|
129
168
|
tmp = Apollo::Crawler::Plugins.constants.select { |c|
|
@@ -143,7 +182,7 @@ module Crawler
|
|
143
182
|
next
|
144
183
|
end
|
145
184
|
|
146
|
-
puts "Registered '#{plugin}' -> '#{name}'"
|
185
|
+
puts "Registered plugin '#{plugin}' -> '#{name}'"
|
147
186
|
end
|
148
187
|
end
|
149
188
|
end
|
@@ -219,6 +258,11 @@ module Crawler
|
|
219
258
|
register_plugins(dir)
|
220
259
|
end
|
221
260
|
|
261
|
+
# Register sites which can be crawled
|
262
|
+
@options[:formatter_dirs].each do |dir|
|
263
|
+
register_formatters(dir)
|
264
|
+
end
|
265
|
+
|
222
266
|
if(@options[:list_plugins])
|
223
267
|
headings = ['name', 'class']
|
224
268
|
rows = @plugins
|
@@ -257,7 +301,8 @@ module Crawler
|
|
257
301
|
next
|
258
302
|
end
|
259
303
|
|
260
|
-
puts
|
304
|
+
puts Apollo::Crawler::Formatters::Json.format(res)
|
305
|
+
# puts Apollo::Crawler::Formatters::Plain.format(res)
|
261
306
|
end
|
262
307
|
end
|
263
308
|
end
|
data/lib/apollo_crawler.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
# require 'apollo_crawler/plugin'
|
2
2
|
|
3
3
|
require 'apollo_crawler/crawler'
|
4
|
+
require 'apollo_crawler/formatter'
|
4
5
|
require 'apollo_crawler/plugin'
|
5
6
|
|
7
|
+
# Formatters
|
8
|
+
require 'apollo_crawler/formatters/formatter_plain'
|
9
|
+
|
10
|
+
# Plugins
|
6
11
|
require 'apollo_crawler/plugins/alexa_com/alexa'
|
7
12
|
require 'apollo_crawler/plugins/firmy_cz/firmy'
|
8
13
|
require 'apollo_crawler/plugins/slashdot_org/slashdot'
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'formatter')
|
4
|
+
|
5
|
+
module Apollo
|
6
|
+
module Crawler
|
7
|
+
module Formatters
|
8
|
+
class Json < Formatter
|
9
|
+
def format(obj)
|
10
|
+
return Plain.format(obj)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.format(obj)
|
14
|
+
return JSON.pretty_generate(obj)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end # Formatters
|
18
|
+
end # Crawler
|
19
|
+
end # Apollo
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'awesome_print'
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'formatter')
|
4
|
+
|
5
|
+
module Apollo
|
6
|
+
module Crawler
|
7
|
+
module Formatters
|
8
|
+
class Plain < Formatter
|
9
|
+
def format(obj)
|
10
|
+
return Plain.format(obj)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.format(obj)
|
14
|
+
return obj.inspect
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end # Formatters
|
18
|
+
end # Crawler
|
19
|
+
end # Apollo
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apollo-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.41
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -27,6 +27,22 @@ dependencies:
|
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: awesome_print
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
30
46
|
- !ruby/object:Gem::Dependency
|
31
47
|
name: active_support
|
32
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -210,8 +226,11 @@ executables:
|
|
210
226
|
extensions: []
|
211
227
|
extra_rdoc_files: []
|
212
228
|
files:
|
229
|
+
- ./lib/apollo_crawler/formatters/formatter_plain.rb
|
230
|
+
- ./lib/apollo_crawler/formatters/formatter_json.rb
|
213
231
|
- ./lib/apollo_crawler/version.rb
|
214
232
|
- ./lib/apollo_crawler/crawler.rb
|
233
|
+
- ./lib/apollo_crawler/formatter.rb
|
215
234
|
- ./lib/apollo_crawler/plugin_template.rb
|
216
235
|
- ./lib/apollo_crawler/plugins/xkcd_com/xkcd.rb
|
217
236
|
- ./lib/apollo_crawler/plugins/slashdot_org/slashdot.rb
|