apollo-crawler 0.0.40 → 0.0.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/apollo-crawler
CHANGED
@@ -25,6 +25,7 @@ require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'versio
|
|
25
25
|
module Crawler
|
26
26
|
class Program
|
27
27
|
@@PLUGIN_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "plugins")
|
28
|
+
@@FORMATTERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "formatters")
|
28
29
|
@@PLUGIN_TEMPLATE_NAME = "plugin_template.rb"
|
29
30
|
|
30
31
|
# This hash will hold all of the options
|
@@ -33,10 +34,12 @@ module Crawler
|
|
33
34
|
@options = nil
|
34
35
|
@optparser = nil
|
35
36
|
@plugins = nil
|
37
|
+
@formatters = nil
|
36
38
|
|
37
39
|
# Initializer - Constructor
|
38
40
|
def initialize
|
39
41
|
@plugins = {}
|
42
|
+
@formatters = {}
|
40
43
|
end
|
41
44
|
|
42
45
|
# Initialize command-line options
|
@@ -47,6 +50,9 @@ module Crawler
|
|
47
50
|
@options[:plugin_dirs] = [
|
48
51
|
@@PLUGIN_DIR
|
49
52
|
]
|
53
|
+
@options[:formatter_dirs] = [
|
54
|
+
@@FORMATTERS_DIR
|
55
|
+
]
|
50
56
|
@options[:generate_plugin] = nil
|
51
57
|
|
52
58
|
@optparser = OptionParser.new do | opts |
|
@@ -115,15 +121,48 @@ module Crawler
|
|
115
121
|
end
|
116
122
|
end
|
117
123
|
|
124
|
+
# Register formatters
|
125
|
+
def register_formatters(dir)
|
126
|
+
if(@options[:verbose])
|
127
|
+
puts "Registering formatters - '#{dir}'"
|
128
|
+
end
|
129
|
+
|
130
|
+
files = File.join(dir, "**", "*.rb")
|
131
|
+
Dir.glob(files).each do |file|
|
132
|
+
require file
|
133
|
+
end
|
134
|
+
|
135
|
+
tmp = Apollo::Crawler::Formatters.constants.select { |c|
|
136
|
+
Class === Apollo::Crawler::Formatters.const_get(c)
|
137
|
+
}
|
138
|
+
|
139
|
+
tmp.each do |x|
|
140
|
+
klass = Object.const_get('Apollo').const_get('Crawler').const_get('Formatters').const_get(x)
|
141
|
+
@formatters.merge!({ x.downcase.to_s => klass})
|
142
|
+
end
|
143
|
+
|
144
|
+
if(@options[:verbose])
|
145
|
+
@formatters.each do |formatter, klass|
|
146
|
+
name = klass.new.class.name
|
147
|
+
|
148
|
+
if name == "Apollo::Crawler::Formatters::Formatter"
|
149
|
+
next
|
150
|
+
end
|
151
|
+
|
152
|
+
puts "Registered formatter '#{formatter}' -> '#{name}'"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
118
157
|
# Register plugins (specific crawlers)
|
119
158
|
def register_plugins(dir)
|
120
159
|
if(@options[:verbose])
|
121
160
|
puts "Registering plugins - '#{dir}'"
|
122
161
|
end
|
123
162
|
|
124
|
-
|
125
|
-
Dir.glob(
|
126
|
-
require
|
163
|
+
files = File.join(dir, "**", "*.rb")
|
164
|
+
Dir.glob(files).each do |file|
|
165
|
+
require file
|
127
166
|
end
|
128
167
|
|
129
168
|
tmp = Apollo::Crawler::Plugins.constants.select { |c|
|
@@ -143,7 +182,7 @@ module Crawler
|
|
143
182
|
next
|
144
183
|
end
|
145
184
|
|
146
|
-
puts "Registered '#{plugin}' -> '#{name}'"
|
185
|
+
puts "Registered plugin '#{plugin}' -> '#{name}'"
|
147
186
|
end
|
148
187
|
end
|
149
188
|
end
|
@@ -219,6 +258,11 @@ module Crawler
|
|
219
258
|
register_plugins(dir)
|
220
259
|
end
|
221
260
|
|
261
|
+
# Register sites which can be crawled
|
262
|
+
@options[:formatter_dirs].each do |dir|
|
263
|
+
register_formatters(dir)
|
264
|
+
end
|
265
|
+
|
222
266
|
if(@options[:list_plugins])
|
223
267
|
headings = ['name', 'class']
|
224
268
|
rows = @plugins
|
@@ -257,7 +301,8 @@ module Crawler
|
|
257
301
|
next
|
258
302
|
end
|
259
303
|
|
260
|
-
puts
|
304
|
+
puts Apollo::Crawler::Formatters::Json.format(res)
|
305
|
+
# puts Apollo::Crawler::Formatters::Plain.format(res)
|
261
306
|
end
|
262
307
|
end
|
263
308
|
end
|
data/lib/apollo_crawler.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
# require 'apollo_crawler/plugin'
|
2
2
|
|
3
3
|
require 'apollo_crawler/crawler'
|
4
|
+
require 'apollo_crawler/formatter'
|
4
5
|
require 'apollo_crawler/plugin'
|
5
6
|
|
7
|
+
# Formatters
|
8
|
+
require 'apollo_crawler/formatters/formatter_plain'
|
9
|
+
|
10
|
+
# Plugins
|
6
11
|
require 'apollo_crawler/plugins/alexa_com/alexa'
|
7
12
|
require 'apollo_crawler/plugins/firmy_cz/firmy'
|
8
13
|
require 'apollo_crawler/plugins/slashdot_org/slashdot'
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'formatter')
|
4
|
+
|
5
|
+
module Apollo
|
6
|
+
module Crawler
|
7
|
+
module Formatters
|
8
|
+
class Json < Formatter
|
9
|
+
def format(obj)
|
10
|
+
return Plain.format(obj)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.format(obj)
|
14
|
+
return JSON.pretty_generate(obj)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end # Formatters
|
18
|
+
end # Crawler
|
19
|
+
end # Apollo
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'awesome_print'
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'formatter')
|
4
|
+
|
5
|
+
module Apollo
|
6
|
+
module Crawler
|
7
|
+
module Formatters
|
8
|
+
class Plain < Formatter
|
9
|
+
def format(obj)
|
10
|
+
return Plain.format(obj)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.format(obj)
|
14
|
+
return obj.inspect
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end # Formatters
|
18
|
+
end # Crawler
|
19
|
+
end # Apollo
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apollo-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.41
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -27,6 +27,22 @@ dependencies:
|
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: awesome_print
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
30
46
|
- !ruby/object:Gem::Dependency
|
31
47
|
name: active_support
|
32
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -210,8 +226,11 @@ executables:
|
|
210
226
|
extensions: []
|
211
227
|
extra_rdoc_files: []
|
212
228
|
files:
|
229
|
+
- ./lib/apollo_crawler/formatters/formatter_plain.rb
|
230
|
+
- ./lib/apollo_crawler/formatters/formatter_json.rb
|
213
231
|
- ./lib/apollo_crawler/version.rb
|
214
232
|
- ./lib/apollo_crawler/crawler.rb
|
233
|
+
- ./lib/apollo_crawler/formatter.rb
|
215
234
|
- ./lib/apollo_crawler/plugin_template.rb
|
216
235
|
- ./lib/apollo_crawler/plugins/xkcd_com/xkcd.rb
|
217
236
|
- ./lib/apollo_crawler/plugins/slashdot_org/slashdot.rb
|