apollo-crawler 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTA2ZTNjOTRiY2UzMjUxNmFiYjliOWYzMjA4MjZlMjAzZDQzNTgyZQ==
5
+ data.tar.gz: !binary |-
6
+ MTM3YzE5OTk0NWYyZTkwOTc3OWY3ZDUyMTE2YjA2ZjJiYjQyOGM5OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ OWY2ZTM0MDE0NzI3ZDkyODg1NTczYWVmZWIxODI5OGQyYzA4ZWM4MmI0YmRj
10
+ NjQxMTg3ZDgyZjBjZTA3ZTAxNWU0Mjc2YjUxMWE0N2MxNWI5NTc2MDU1OWMx
11
+ MDhlMDAwMzFmNGNlYTlhMzZmYjMxZDYxYWMyMjQxMTc1MThlY2Q=
12
+ data.tar.gz: !binary |-
13
+ ZGYyM2I3NjZlM2M1ZWIxMmZkOTI4NTkyMDZlNGMwZWU0NjdlMTM3NmZiYTA4
14
+ OTFlYjI3NDUzNGQxMzhjNDU4NzQzMjBlNmU3NDJkNjJmNmY1NDI0Yjc0MjBm
15
+ NzFjOGQ3NTlkNjRjYzkyYTRlNDczODI3N2UxODRhZDc3OTEwOTM=
@@ -1,404 +1,404 @@
1
- #! /usr/bin/env ruby
2
-
3
- # encoding: utf-8
4
-
5
- require "rubygems"
6
- require "bundler/setup"
7
-
8
- require 'json'
9
-
10
- require "thor"
11
-
12
- require "open-uri"
13
- require "nokogiri"
14
-
15
- require "pp"
16
- require "optparse"
17
-
18
- require 'active_support'
19
- require 'active_support/inflector'
20
-
21
- require 'terminal-table'
22
-
23
- require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
24
-
25
- module Apollo
26
- class CrawlerProgram
27
- @@CACHES_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "caches")
28
- @@CRAWLERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "crawlers")
29
- @@FORMATTERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "formatters")
30
- @@CRAWLER_TEMPLATE_NAME = "crawler_template.rb"
31
-
32
- # This hash will hold all of the options
33
- # parsed from the command-line by
34
- # OptionParser.
35
- @options = nil
36
- @optparser = nil
37
- @caches = nil
38
- @crawlers = nil
39
- @formatters = nil
40
- @formatter = nil
41
-
42
- # Initializer - Constructor
43
- def initialize
44
- @caches = {}
45
- @crawlers = {}
46
- @formatters = {}
47
- end
48
-
49
- # Initialize command-line options
50
- def init_options
51
- @options = {}
52
- @options[:verbose] = false
53
- @options[:version] = false
54
- @options[:cache_dirs] = [
55
- @@CACHES_DIR
56
- ]
57
- @options[:crawler_dirs] = [
58
- @@CRAWLERS_DIR
59
- ]
60
- @options[:formatter_dirs] = [
61
- @@FORMATTERS_DIR
62
- ]
63
- @options[:generate_crawler] = nil
64
-
65
- @optparser = OptionParser.new do | opts |
66
- # This displays the help screen, all programs are
67
- # assumed to have this option.
68
- opts.on('-h', '--help', 'Display this screen') do
69
- puts opts
70
- exit
71
- end
72
-
73
- opts.on('-a', '--all', 'Run all crawlers') do
74
- @options[:run_all] = true
75
- end
76
-
77
- opts.on('-f', '--format [NAME]', "Formatter used") do |name|
78
- @options[:formatter] = name
79
- end
80
-
81
- opts.on('-g', '--generate [NAME]', "Generate scaffold for new crawler") do |name|
82
- @options[:generate_crawler] = name
83
- end
84
-
85
- opts.on('-i', '--include [PATH]', 'Include additional crawler or crawler directory') do |path|
86
- @options[:crawler_dirs] << path
87
- end
88
-
89
- opts.on('-v', '--verbose', 'Enable verbose output') do
90
- @options[:verbose] = true
91
- end
92
-
93
- opts.on('-V', '--version', 'Show version info') do
94
- @options[:version] = true
95
- end
96
-
97
- opts.on('-l', '--list-crawlers', 'List of crawlers') do
98
- @options[:list_crawlers] = true
99
- end
100
-
101
- opts.on(nil, '--list-formatters', 'List of formatters available') do
102
- @options[:list_formatters] = true
103
- end
104
- end
105
- end
106
-
107
- # Parse the options passed to command-line
108
- def parse_options
109
- # Parse the command-line. Remember there are two forms
110
- # of the parse method. The 'parse' method simply parses
111
- # ARGV, while the 'parse!' method parses ARGV and removes
112
- # any options found there, as well as any parameters for
113
- # the options. What's left is the list of files to resize.
114
- @optparser.parse!
115
- end
116
-
117
- # Load global options first
118
- # Merge it with local options (if they exists)
119
- def load_config_file()
120
- config = File.join(File.dirname(__FILE__), "config", "crawler.rb")
121
- if(File.exists?(config))
122
- if(@options[:verbose])
123
- puts "Loading config '#{config}'"
124
- end
125
-
126
- # puts "Let's require '#{@options[:verbose]}'"
127
- require config
128
- else
129
- if(@options[:verbose])
130
- # TODO: Add support for initial rake task generation
131
- # Something like this:
132
- # rake config:init # Initializes config files with
133
- # their defaults (if not exists already)
134
- puts "Default config does not exist, skipping - '#{config}'"
135
- end
136
- end
137
- end
138
-
139
- # Register caches
140
- def register_cache(dir)
141
- if(@options[:verbose])
142
- puts "Registering caches - '#{dir}'"
143
- end
144
-
145
- files = File.join(dir, "**", "*.rb")
146
- Dir.glob(files).each do |file|
147
- require file
148
- end
149
-
150
- tmp = Apollo::Caches.constants.select { |c|
151
- Class === Apollo::Caches.const_get(c)
152
- }
153
-
154
- tmp.each do |x|
155
- klass = Object.const_get('Apollo').const_get('Caches').const_get(x)
156
- @caches.merge!({ x.downcase.to_s => klass})
157
- end
158
-
159
- if(@options[:verbose])
160
- @caches.each do |cache, klass|
161
- name = klass
162
-
163
- # klass.ancestors.include?(Apollo::Caches::Cache)
164
- if name == "Apollo::Caches::Cache"
165
- next
166
- end
167
-
168
- puts "Registered cache '#{cache}' -> '#{name}'"
169
- end
170
- end
171
- end
172
-
173
- # Register crawlers
174
- def register_crawlers(dir)
175
- if(@options[:verbose])
176
- puts "Registering crawlers - '#{dir}'"
177
- end
178
-
179
- files = File.join(dir, "**", "*.rb")
180
- Dir.glob(files).each do |file|
181
- require file
182
- end
183
-
184
- tmp = Apollo::Crawlers.constants.select { |c|
185
- Class === Apollo::Crawlers.const_get(c)
186
- }
187
-
188
- tmp.each do |x|
189
- klass = Object.const_get('Apollo').const_get('Crawlers').const_get(x)
190
- @crawlers.merge!({ x.downcase.to_s => klass})
191
- end
192
-
193
- if(@options[:verbose])
194
- @crawlers.each do |crawler, klass|
195
- name = klass.new.class.name
196
-
197
- if name == "Apollo::Crawlers::Crawler"
198
- next
199
- end
200
-
201
- puts "Registered crawler '#{crawler}' -> '#{name}'"
202
- end
203
- end
204
- end
205
-
206
- # Register formatters
207
- def register_formatters(dir)
208
- if(@options[:verbose])
209
- puts "Registering formatters - '#{dir}'"
210
- end
211
-
212
- files = File.join(dir, "**", "*.rb")
213
- Dir.glob(files).each do |file|
214
- require file
215
- end
216
-
217
- tmp = Apollo::Formatters.constants.select { |c|
218
- Class === Apollo::Formatters.const_get(c)
219
- }
220
-
221
- tmp.each do |x|
222
- klass = Object.const_get('Apollo').const_get('Formatters').const_get(x)
223
- @formatters.merge!({ x.downcase.to_s => klass})
224
- end
225
-
226
- if(@options[:verbose])
227
- @formatters.each do |formatter, klass|
228
- name = klass.new.class.name
229
-
230
- if name == "Apollo::Formatters::Formatter"
231
- next
232
- end
233
-
234
- puts "Registered formatter '#{formatter}' -> '#{name}'"
235
- end
236
- end
237
- end
238
-
239
- def generate_crawler(name, url = nil, matcher = nil)
240
- name = name.titleize.gsub(" ", "")
241
-
242
- if(@options[:verbose])
243
- puts "Generating new crawler '#{name}'"
244
- end
245
-
246
- template_path = File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', @@CRAWLER_TEMPLATE_NAME)
247
- if(File.exists?(template_path) == false)
248
- puts "Template file '#{template_path}' does not exists!"
249
- return
250
- end
251
-
252
- if(@options[:verbose])
253
- puts "Using template '#{template_path}'"
254
- end
255
-
256
- dest_path = File.join(Dir.pwd, "#{name.underscore}.rb")
257
-
258
- url = url ? url : "http://some-url-here"
259
- matcher = matcher ? matcher : "//a"
260
-
261
- placeholders = {
262
- "CRAWLER_CLASS_NAME" => name,
263
- "CRAWLER_NAME" => name.titleize,
264
- "CRAWLER_URL" => url,
265
- "CRAWLER_MATCHER" => matcher
266
- }
267
-
268
- puts "Generating crawler '#{name.titleize}', class: '#{name}', path: '#{dest_path}'"
269
-
270
- File.open(template_path, 'r') do |tmpl|
271
- File.open(dest_path, 'w') do |crawler|
272
- while line = tmpl.gets
273
- #puts line
274
- placeholders.each do |k, v|
275
- line.gsub!(k, v)
276
- end
277
-
278
- crawler.puts line
279
- end
280
- end
281
- end
282
- end
283
-
284
- def run
285
- init_options()
286
-
287
- parse_options()
288
-
289
- if(@options[:version])
290
- puts Apollo::VERSION
291
- exit
292
- end
293
-
294
- load_config_file()
295
-
296
- if(@options[:generate_crawler])
297
- name = @options[:generate_crawler]
298
- url = ARGV.length > 0 ? ARGV[0] : nil
299
- matcher = ARGV.length > 1 ? ARGV[1] : nil
300
-
301
- self.generate_crawler(name, url, matcher)
302
- exit
303
- end
304
-
305
- # Register caches which can be used
306
- @options[:cache_dirs].each do |dir|
307
- register_cache(dir)
308
- end
309
-
310
- # Register sites which can be crawled
311
- @options[:crawler_dirs].each do |dir|
312
- register_crawlers(dir)
313
- end
314
-
315
- # Register sites which can be crawled
316
- @options[:formatter_dirs].each do |dir|
317
- register_formatters(dir)
318
- end
319
-
320
- # Set default formatter here
321
- formatter_name = "json"
322
- if(@options[:formatter])
323
- formatter_name = @options[:formatter]
324
- end
325
-
326
- # Look for specified formatter
327
- f = @formatters.select { |k, v|
328
- k.downcase == formatter_name.downcase
329
- }
330
-
331
- if(f)
332
- @formatter = f[f.keys[0]]
333
- end
334
-
335
- if(@options[:list_formatters])
336
- headings = ['name', 'class']
337
- rows = @formatters
338
-
339
- table = Terminal::Table.new :headings => headings, :rows => rows
340
-
341
- puts table
342
- return
343
- end
344
-
345
- if(@options[:list_crawlers])
346
- headings = ['name', 'class']
347
- rows = @crawlers
348
-
349
- table = Terminal::Table.new :headings => headings, :rows => rows
350
-
351
- puts table
352
- return
353
- end
354
-
355
-
356
-
357
- crawlers = []
358
- if(ARGV.length > 0)
359
- crawlers << ARGV.shift
360
- end
361
-
362
- if(@options[:run_all])
363
- crawlers = @crawlers.keys
364
- end
365
-
366
- if(crawlers.empty?)
367
- puts @optparser
368
- exit
369
- end
370
-
371
- crawlers.each do |crawler|
372
- p = @crawlers[crawler.downcase]
373
- if(p == nil)
374
- puts "Invalid crawler name - '#{crawler}'"
375
- puts "See program help"
376
- next
377
- end
378
-
379
- if(@options[:verbose])
380
- puts "Running '#{crawler}'"
381
- end
382
-
383
- res = p.new.etl(ARGV)
384
- if(res.nil?)
385
- next
386
- end
387
-
388
- if(res.kind_of?(Array) == false)
389
- res = [res]
390
- end
391
-
392
- res.each do |tmp|
393
- puts @formatter.format(tmp)
394
- end
395
- end
396
- end
397
- end
398
- end
399
-
400
- if __FILE__ == $0
401
- Apollo::CrawlerProgram.new.run()
402
- else
403
- Apollo::CrawlerProgram.new.run()
404
- end
1
+ #! /usr/bin/env ruby
2
+
3
+ # encoding: utf-8
4
+
5
+ require "rubygems"
6
+ require "bundler/setup"
7
+
8
+ require 'json'
9
+
10
+ require "thor"
11
+
12
+ require "open-uri"
13
+ require "nokogiri"
14
+
15
+ require "pp"
16
+ require "optparse"
17
+
18
+ require 'active_support'
19
+ require 'active_support/inflector'
20
+
21
+ require 'terminal-table'
22
+
23
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
24
+
25
+ module Apollo
26
+ class CrawlerProgram
27
+ @@CACHES_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "caches")
28
+ @@CRAWLERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "crawlers")
29
+ @@FORMATTERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "formatters")
30
+ @@CRAWLER_TEMPLATE_NAME = "crawler_template.rb"
31
+
32
+ # This hash will hold all of the options
33
+ # parsed from the command-line by
34
+ # OptionParser.
35
+ @options = nil
36
+ @optparser = nil
37
+ @caches = nil
38
+ @crawlers = nil
39
+ @formatters = nil
40
+ @formatter = nil
41
+
42
+ # Initializer - Constructor
43
+ def initialize
44
+ @caches = {}
45
+ @crawlers = {}
46
+ @formatters = {}
47
+ end
48
+
49
+ # Initialize command-line options
50
+ def init_options
51
+ @options = {}
52
+ @options[:verbose] = false
53
+ @options[:version] = false
54
+ @options[:cache_dirs] = [
55
+ @@CACHES_DIR
56
+ ]
57
+ @options[:crawler_dirs] = [
58
+ @@CRAWLERS_DIR
59
+ ]
60
+ @options[:formatter_dirs] = [
61
+ @@FORMATTERS_DIR
62
+ ]
63
+ @options[:generate_crawler] = nil
64
+
65
+ @optparser = OptionParser.new do | opts |
66
+ # This displays the help screen, all programs are
67
+ # assumed to have this option.
68
+ opts.on('-h', '--help', 'Display this screen') do
69
+ puts opts
70
+ exit
71
+ end
72
+
73
+ opts.on('-a', '--all', 'Run all crawlers') do
74
+ @options[:run_all] = true
75
+ end
76
+
77
+ opts.on('-f', '--format [NAME]', "Formatter used") do |name|
78
+ @options[:formatter] = name
79
+ end
80
+
81
+ opts.on('-g', '--generate [NAME]', "Generate scaffold for new crawler") do |name|
82
+ @options[:generate_crawler] = name
83
+ end
84
+
85
+ opts.on('-i', '--include [PATH]', 'Include additional crawler or crawler directory') do |path|
86
+ @options[:crawler_dirs] << path
87
+ end
88
+
89
+ opts.on('-v', '--verbose', 'Enable verbose output') do
90
+ @options[:verbose] = true
91
+ end
92
+
93
+ opts.on('-V', '--version', 'Show version info') do
94
+ @options[:version] = true
95
+ end
96
+
97
+ opts.on('-l', '--list-crawlers', 'List of crawlers') do
98
+ @options[:list_crawlers] = true
99
+ end
100
+
101
+ opts.on(nil, '--list-formatters', 'List of formatters available') do
102
+ @options[:list_formatters] = true
103
+ end
104
+ end
105
+ end
106
+
107
+ # Parse the options passed to command-line
108
+ def parse_options
109
+ # Parse the command-line. Remember there are two forms
110
+ # of the parse method. The 'parse' method simply parses
111
+ # ARGV, while the 'parse!' method parses ARGV and removes
112
+ # any options found there, as well as any parameters for
113
+ # the options. What's left is the list of files to resize.
114
+ @optparser.parse!
115
+ end
116
+
117
+ # Load global options first
118
+ # Merge it with local options (if they exists)
119
+ def load_config_file()
120
+ config = File.join(File.dirname(__FILE__), "config", "crawler.rb")
121
+ if(File.exists?(config))
122
+ if(@options[:verbose])
123
+ puts "Loading config '#{config}'"
124
+ end
125
+
126
+ # puts "Let's require '#{@options[:verbose]}'"
127
+ require config
128
+ else
129
+ if(@options[:verbose])
130
+ # TODO: Add support for initial rake task generation
131
+ # Something like this:
132
+ # rake config:init # Initializes config files with
133
+ # their defaults (if not exists already)
134
+ puts "Default config does not exist, skipping - '#{config}'"
135
+ end
136
+ end
137
+ end
138
+
139
+ # Register caches
140
+ def register_cache(dir)
141
+ if(@options[:verbose])
142
+ puts "Registering caches - '#{dir}'"
143
+ end
144
+
145
+ files = File.join(dir, "**", "*.rb")
146
+ Dir.glob(files).each do |file|
147
+ require file
148
+ end
149
+
150
+ tmp = Apollo::Caches.constants.select { |c|
151
+ Class === Apollo::Caches.const_get(c)
152
+ }
153
+
154
+ tmp.each do |x|
155
+ klass = Object.const_get('Apollo').const_get('Caches').const_get(x)
156
+ @caches.merge!({ x.downcase.to_s => klass})
157
+ end
158
+
159
+ if(@options[:verbose])
160
+ @caches.each do |cache, klass|
161
+ name = klass
162
+
163
+ # klass.ancestors.include?(Apollo::Caches::Cache)
164
+ if name == "Apollo::Caches::Cache"
165
+ next
166
+ end
167
+
168
+ puts "Registered cache '#{cache}' -> '#{name}'"
169
+ end
170
+ end
171
+ end
172
+
173
+ # Register crawlers
174
+ def register_crawlers(dir)
175
+ if(@options[:verbose])
176
+ puts "Registering crawlers - '#{dir}'"
177
+ end
178
+
179
+ files = File.join(dir, "**", "*.rb")
180
+ Dir.glob(files).each do |file|
181
+ require file
182
+ end
183
+
184
+ tmp = Apollo::Crawlers.constants.select { |c|
185
+ Class === Apollo::Crawlers.const_get(c)
186
+ }
187
+
188
+ tmp.each do |x|
189
+ klass = Object.const_get('Apollo').const_get('Crawlers').const_get(x)
190
+ @crawlers.merge!({ x.downcase.to_s => klass})
191
+ end
192
+
193
+ if(@options[:verbose])
194
+ @crawlers.each do |crawler, klass|
195
+ name = klass.new.class.name
196
+
197
+ if name == "Apollo::Crawlers::Crawler"
198
+ next
199
+ end
200
+
201
+ puts "Registered crawler '#{crawler}' -> '#{name}'"
202
+ end
203
+ end
204
+ end
205
+
206
+ # Register formatters
207
+ def register_formatters(dir)
208
+ if(@options[:verbose])
209
+ puts "Registering formatters - '#{dir}'"
210
+ end
211
+
212
+ files = File.join(dir, "**", "*.rb")
213
+ Dir.glob(files).each do |file|
214
+ require file
215
+ end
216
+
217
+ tmp = Apollo::Formatters.constants.select { |c|
218
+ Class === Apollo::Formatters.const_get(c)
219
+ }
220
+
221
+ tmp.each do |x|
222
+ klass = Object.const_get('Apollo').const_get('Formatters').const_get(x)
223
+ @formatters.merge!({ x.downcase.to_s => klass})
224
+ end
225
+
226
+ if(@options[:verbose])
227
+ @formatters.each do |formatter, klass|
228
+ name = klass.new.class.name
229
+
230
+ if name == "Apollo::Formatters::Formatter"
231
+ next
232
+ end
233
+
234
+ puts "Registered formatter '#{formatter}' -> '#{name}'"
235
+ end
236
+ end
237
+ end
238
+
239
+ def generate_crawler(name, url = nil, matcher = nil)
240
+ name = name.titleize.gsub(" ", "")
241
+
242
+ if(@options[:verbose])
243
+ puts "Generating new crawler '#{name}'"
244
+ end
245
+
246
+ template_path = File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', @@CRAWLER_TEMPLATE_NAME)
247
+ if(File.exists?(template_path) == false)
248
+ puts "Template file '#{template_path}' does not exists!"
249
+ return
250
+ end
251
+
252
+ if(@options[:verbose])
253
+ puts "Using template '#{template_path}'"
254
+ end
255
+
256
+ dest_path = File.join(Dir.pwd, "#{name.underscore}.rb")
257
+
258
+ url = url ? url : "http://some-url-here"
259
+ matcher = matcher ? matcher : "//a"
260
+
261
+ placeholders = {
262
+ "CRAWLER_CLASS_NAME" => name,
263
+ "CRAWLER_NAME" => name.titleize,
264
+ "CRAWLER_URL" => url,
265
+ "CRAWLER_MATCHER" => matcher
266
+ }
267
+
268
+ puts "Generating crawler '#{name.titleize}', class: '#{name}', path: '#{dest_path}'"
269
+
270
+ File.open(template_path, 'r') do |tmpl|
271
+ File.open(dest_path, 'w') do |crawler|
272
+ while line = tmpl.gets
273
+ #puts line
274
+ placeholders.each do |k, v|
275
+ line.gsub!(k, v)
276
+ end
277
+
278
+ crawler.puts line
279
+ end
280
+ end
281
+ end
282
+ end
283
+
284
+ def run
285
+ init_options()
286
+
287
+ parse_options()
288
+
289
+ if(@options[:version])
290
+ puts Apollo::VERSION
291
+ exit
292
+ end
293
+
294
+ load_config_file()
295
+
296
+ if(@options[:generate_crawler])
297
+ name = @options[:generate_crawler]
298
+ url = ARGV.length > 0 ? ARGV[0] : nil
299
+ matcher = ARGV.length > 1 ? ARGV[1] : nil
300
+
301
+ self.generate_crawler(name, url, matcher)
302
+ exit
303
+ end
304
+
305
+ # Register caches which can be used
306
+ @options[:cache_dirs].each do |dir|
307
+ register_cache(dir)
308
+ end
309
+
310
+ # Register sites which can be crawled
311
+ @options[:crawler_dirs].each do |dir|
312
+ register_crawlers(dir)
313
+ end
314
+
315
+ # Register sites which can be crawled
316
+ @options[:formatter_dirs].each do |dir|
317
+ register_formatters(dir)
318
+ end
319
+
320
+ # Set default formatter here
321
+ formatter_name = "json"
322
+ if(@options[:formatter])
323
+ formatter_name = @options[:formatter]
324
+ end
325
+
326
+ # Look for specified formatter
327
+ f = @formatters.select { |k, v|
328
+ k.downcase == formatter_name.downcase
329
+ }
330
+
331
+ if(f)
332
+ @formatter = f[f.keys[0]]
333
+ end
334
+
335
+ if(@options[:list_formatters])
336
+ headings = ['name', 'class']
337
+ rows = @formatters
338
+
339
+ table = Terminal::Table.new :headings => headings, :rows => rows
340
+
341
+ puts table
342
+ return
343
+ end
344
+
345
+ if(@options[:list_crawlers])
346
+ headings = ['name', 'class']
347
+ rows = @crawlers
348
+
349
+ table = Terminal::Table.new :headings => headings, :rows => rows
350
+
351
+ puts table
352
+ return
353
+ end
354
+
355
+
356
+
357
+ crawlers = []
358
+ if(ARGV.length > 0)
359
+ crawlers << ARGV.shift
360
+ end
361
+
362
+ if(@options[:run_all])
363
+ crawlers = @crawlers.keys
364
+ end
365
+
366
+ if(crawlers.empty?)
367
+ puts @optparser
368
+ exit
369
+ end
370
+
371
+ crawlers.each do |crawler|
372
+ p = @crawlers[crawler.downcase]
373
+ if(p == nil)
374
+ puts "Invalid crawler name - '#{crawler}'"
375
+ puts "See program help"
376
+ next
377
+ end
378
+
379
+ if(@options[:verbose])
380
+ puts "Running '#{crawler}'"
381
+ end
382
+
383
+ res = p.new.etl(ARGV)
384
+ if(res.nil?)
385
+ next
386
+ end
387
+
388
+ if(res.kind_of?(Array) == false)
389
+ res = [res]
390
+ end
391
+
392
+ res.each do |tmp|
393
+ puts @formatter.format(tmp)
394
+ end
395
+ end
396
+ end
397
+ end
398
+ end
399
+
400
+ if __FILE__ == $0
401
+ Apollo::CrawlerProgram.new.run()
402
+ else
403
+ Apollo::CrawlerProgram.new.run()
404
+ end