apollo-crawler 0.0.40 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/apollo-crawler CHANGED
@@ -25,6 +25,7 @@ require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'versio
25
25
  module Crawler
26
26
  class Program
27
27
  @@PLUGIN_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "plugins")
28
+ @@FORMATTERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "formatters")
28
29
  @@PLUGIN_TEMPLATE_NAME = "plugin_template.rb"
29
30
 
30
31
  # This hash will hold all of the options
@@ -33,10 +34,12 @@ module Crawler
33
34
  @options = nil
34
35
  @optparser = nil
35
36
  @plugins = nil
37
+ @formatters = nil
36
38
 
37
39
  # Initializer - Constructor
38
40
  def initialize
39
41
  @plugins = {}
42
+ @formatters = {}
40
43
  end
41
44
 
42
45
  # Initialize command-line options
@@ -47,6 +50,9 @@ module Crawler
47
50
  @options[:plugin_dirs] = [
48
51
  @@PLUGIN_DIR
49
52
  ]
53
+ @options[:formatter_dirs] = [
54
+ @@FORMATTERS_DIR
55
+ ]
50
56
  @options[:generate_plugin] = nil
51
57
 
52
58
  @optparser = OptionParser.new do | opts |
@@ -115,15 +121,48 @@ module Crawler
115
121
  end
116
122
  end
117
123
 
124
+ # Register formatters
125
+ def register_formatters(dir)
126
+ if(@options[:verbose])
127
+ puts "Registering formatters - '#{dir}'"
128
+ end
129
+
130
+ files = File.join(dir, "**", "*.rb")
131
+ Dir.glob(files).each do |file|
132
+ require file
133
+ end
134
+
135
+ tmp = Apollo::Crawler::Formatters.constants.select { |c|
136
+ Class === Apollo::Crawler::Formatters.const_get(c)
137
+ }
138
+
139
+ tmp.each do |x|
140
+ klass = Object.const_get('Apollo').const_get('Crawler').const_get('Formatters').const_get(x)
141
+ @formatters.merge!({ x.downcase.to_s => klass})
142
+ end
143
+
144
+ if(@options[:verbose])
145
+ @formatters.each do |formatter, klass|
146
+ name = klass.new.class.name
147
+
148
+ if name == "Apollo::Crawler::Formatters::Formatter"
149
+ next
150
+ end
151
+
152
+ puts "Registered formatter '#{formatter}' -> '#{name}'"
153
+ end
154
+ end
155
+ end
156
+
118
157
  # Register plugins (specific crawlers)
119
158
  def register_plugins(dir)
120
159
  if(@options[:verbose])
121
160
  puts "Registering plugins - '#{dir}'"
122
161
  end
123
162
 
124
- sites = File.join(dir, "**", "*.rb")
125
- Dir.glob(sites).each do |site|
126
- require site
163
+ files = File.join(dir, "**", "*.rb")
164
+ Dir.glob(files).each do |file|
165
+ require file
127
166
  end
128
167
 
129
168
  tmp = Apollo::Crawler::Plugins.constants.select { |c|
@@ -143,7 +182,7 @@ module Crawler
143
182
  next
144
183
  end
145
184
 
146
- puts "Registered '#{plugin}' -> '#{name}'"
185
+ puts "Registered plugin '#{plugin}' -> '#{name}'"
147
186
  end
148
187
  end
149
188
  end
@@ -219,6 +258,11 @@ module Crawler
219
258
  register_plugins(dir)
220
259
  end
221
260
 
261
+ # Register sites which can be crawled
262
+ @options[:formatter_dirs].each do |dir|
263
+ register_formatters(dir)
264
+ end
265
+
222
266
  if(@options[:list_plugins])
223
267
  headings = ['name', 'class']
224
268
  rows = @plugins
@@ -257,7 +301,8 @@ module Crawler
257
301
  next
258
302
  end
259
303
 
260
- puts JSON.pretty_generate(res)
304
+ puts Apollo::Crawler::Formatters::Json.format(res)
305
+ # puts Apollo::Crawler::Formatters::Plain.format(res)
261
306
  end
262
307
  end
263
308
  end
@@ -1,8 +1,13 @@
1
1
  # require 'apollo_crawler/plugin'
2
2
 
3
3
  require 'apollo_crawler/crawler'
4
+ require 'apollo_crawler/formatter'
4
5
  require 'apollo_crawler/plugin'
5
6
 
7
+ # Formatters
8
+ require 'apollo_crawler/formatters/formatter_plain'
9
+
10
+ # Plugins
6
11
  require 'apollo_crawler/plugins/alexa_com/alexa'
7
12
  require 'apollo_crawler/plugins/firmy_cz/firmy'
8
13
  require 'apollo_crawler/plugins/slashdot_org/slashdot'
@@ -0,0 +1,8 @@
1
+ module Apollo
2
+ module Crawler
3
+ module Formatters
4
+ class Formatter
5
+ end
6
+ end # Formatters
7
+ end # Crawler
8
+ end # Apollo
@@ -0,0 +1,19 @@
1
+ require 'json'
2
+
3
+ require File.join(File.dirname(__FILE__), '..', 'formatter')
4
+
5
+ module Apollo
6
+ module Crawler
7
+ module Formatters
8
+ class Json < Formatter
9
+ def format(obj)
10
+ return Plain.format(obj)
11
+ end
12
+
13
+ def self.format(obj)
14
+ return JSON.pretty_generate(obj)
15
+ end
16
+ end
17
+ end # Formatters
18
+ end # Crawler
19
+ end # Apollo
@@ -0,0 +1,19 @@
1
+ require 'awesome_print'
2
+
3
+ require File.join(File.dirname(__FILE__), '..', 'formatter')
4
+
5
+ module Apollo
6
+ module Crawler
7
+ module Formatters
8
+ class Plain < Formatter
9
+ def format(obj)
10
+ return Plain.format(obj)
11
+ end
12
+
13
+ def self.format(obj)
14
+ return obj.inspect
15
+ end
16
+ end
17
+ end # Formatters
18
+ end # Crawler
19
+ end # Apollo
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.40'
3
+ VERSION = '0.0.41'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.40
4
+ version: 0.0.41
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: awesome_print
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: active_support
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -210,8 +226,11 @@ executables:
210
226
  extensions: []
211
227
  extra_rdoc_files: []
212
228
  files:
229
+ - ./lib/apollo_crawler/formatters/formatter_plain.rb
230
+ - ./lib/apollo_crawler/formatters/formatter_json.rb
213
231
  - ./lib/apollo_crawler/version.rb
214
232
  - ./lib/apollo_crawler/crawler.rb
233
+ - ./lib/apollo_crawler/formatter.rb
215
234
  - ./lib/apollo_crawler/plugin_template.rb
216
235
  - ./lib/apollo_crawler/plugins/xkcd_com/xkcd.rb
217
236
  - ./lib/apollo_crawler/plugins/slashdot_org/slashdot.rb