apollo-crawler 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTA2ZTNjOTRiY2UzMjUxNmFiYjliOWYzMjA4MjZlMjAzZDQzNTgyZQ==
5
+ data.tar.gz: !binary |-
6
+ MTM3YzE5OTk0NWYyZTkwOTc3OWY3ZDUyMTE2YjA2ZjJiYjQyOGM5OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ OWY2ZTM0MDE0NzI3ZDkyODg1NTczYWVmZWIxODI5OGQyYzA4ZWM4MmI0YmRj
10
+ NjQxMTg3ZDgyZjBjZTA3ZTAxNWU0Mjc2YjUxMWE0N2MxNWI5NTc2MDU1OWMx
11
+ MDhlMDAwMzFmNGNlYTlhMzZmYjMxZDYxYWMyMjQxMTc1MThlY2Q=
12
+ data.tar.gz: !binary |-
13
+ ZGYyM2I3NjZlM2M1ZWIxMmZkOTI4NTkyMDZlNGMwZWU0NjdlMTM3NmZiYTA4
14
+ OTFlYjI3NDUzNGQxMzhjNDU4NzQzMjBlNmU3NDJkNjJmNmY1NDI0Yjc0MjBm
15
+ NzFjOGQ3NTlkNjRjYzkyYTRlNDczODI3N2UxODRhZDc3OTEwOTM=
@@ -1,404 +1,404 @@
1
- #! /usr/bin/env ruby
2
-
3
- # encoding: utf-8
4
-
5
- require "rubygems"
6
- require "bundler/setup"
7
-
8
- require 'json'
9
-
10
- require "thor"
11
-
12
- require "open-uri"
13
- require "nokogiri"
14
-
15
- require "pp"
16
- require "optparse"
17
-
18
- require 'active_support'
19
- require 'active_support/inflector'
20
-
21
- require 'terminal-table'
22
-
23
- require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
24
-
25
- module Apollo
26
- class CrawlerProgram
27
- @@CACHES_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "caches")
28
- @@CRAWLERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "crawlers")
29
- @@FORMATTERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "formatters")
30
- @@CRAWLER_TEMPLATE_NAME = "crawler_template.rb"
31
-
32
- # This hash will hold all of the options
33
- # parsed from the command-line by
34
- # OptionParser.
35
- @options = nil
36
- @optparser = nil
37
- @caches = nil
38
- @crawlers = nil
39
- @formatters = nil
40
- @formatter = nil
41
-
42
- # Initializer - Constructor
43
- def initialize
44
- @caches = {}
45
- @crawlers = {}
46
- @formatters = {}
47
- end
48
-
49
- # Initialize command-line options
50
- def init_options
51
- @options = {}
52
- @options[:verbose] = false
53
- @options[:version] = false
54
- @options[:cache_dirs] = [
55
- @@CACHES_DIR
56
- ]
57
- @options[:crawler_dirs] = [
58
- @@CRAWLERS_DIR
59
- ]
60
- @options[:formatter_dirs] = [
61
- @@FORMATTERS_DIR
62
- ]
63
- @options[:generate_crawler] = nil
64
-
65
- @optparser = OptionParser.new do | opts |
66
- # This displays the help screen, all programs are
67
- # assumed to have this option.
68
- opts.on('-h', '--help', 'Display this screen') do
69
- puts opts
70
- exit
71
- end
72
-
73
- opts.on('-a', '--all', 'Run all crawlers') do
74
- @options[:run_all] = true
75
- end
76
-
77
- opts.on('-f', '--format [NAME]', "Formatter used") do |name|
78
- @options[:formatter] = name
79
- end
80
-
81
- opts.on('-g', '--generate [NAME]', "Generate scaffold for new crawler") do |name|
82
- @options[:generate_crawler] = name
83
- end
84
-
85
- opts.on('-i', '--include [PATH]', 'Include additional crawler or crawler directory') do |path|
86
- @options[:crawler_dirs] << path
87
- end
88
-
89
- opts.on('-v', '--verbose', 'Enable verbose output') do
90
- @options[:verbose] = true
91
- end
92
-
93
- opts.on('-V', '--version', 'Show version info') do
94
- @options[:version] = true
95
- end
96
-
97
- opts.on('-l', '--list-crawlers', 'List of crawlers') do
98
- @options[:list_crawlers] = true
99
- end
100
-
101
- opts.on(nil, '--list-formatters', 'List of formatters available') do
102
- @options[:list_formatters] = true
103
- end
104
- end
105
- end
106
-
107
- # Parse the options passed to command-line
108
- def parse_options
109
- # Parse the command-line. Remember there are two forms
110
- # of the parse method. The 'parse' method simply parses
111
- # ARGV, while the 'parse!' method parses ARGV and removes
112
- # any options found there, as well as any parameters for
113
- # the options. What's left is the list of files to resize.
114
- @optparser.parse!
115
- end
116
-
117
- # Load global options first
118
- # Merge it with local options (if they exists)
119
- def load_config_file()
120
- config = File.join(File.dirname(__FILE__), "config", "crawler.rb")
121
- if(File.exists?(config))
122
- if(@options[:verbose])
123
- puts "Loading config '#{config}'"
124
- end
125
-
126
- # puts "Let's require '#{@options[:verbose]}'"
127
- require config
128
- else
129
- if(@options[:verbose])
130
- # TODO: Add support for initial rake task generation
131
- # Something like this:
132
- # rake config:init # Initializes config files with
133
- # their defaults (if not exists already)
134
- puts "Default config does not exist, skipping - '#{config}'"
135
- end
136
- end
137
- end
138
-
139
- # Register caches
140
- def register_cache(dir)
141
- if(@options[:verbose])
142
- puts "Registering caches - '#{dir}'"
143
- end
144
-
145
- files = File.join(dir, "**", "*.rb")
146
- Dir.glob(files).each do |file|
147
- require file
148
- end
149
-
150
- tmp = Apollo::Caches.constants.select { |c|
151
- Class === Apollo::Caches.const_get(c)
152
- }
153
-
154
- tmp.each do |x|
155
- klass = Object.const_get('Apollo').const_get('Caches').const_get(x)
156
- @caches.merge!({ x.downcase.to_s => klass})
157
- end
158
-
159
- if(@options[:verbose])
160
- @caches.each do |cache, klass|
161
- name = klass
162
-
163
- # klass.ancestors.include?(Apollo::Caches::Cache)
164
- if name == "Apollo::Caches::Cache"
165
- next
166
- end
167
-
168
- puts "Registered cache '#{cache}' -> '#{name}'"
169
- end
170
- end
171
- end
172
-
173
- # Register crawlers
174
- def register_crawlers(dir)
175
- if(@options[:verbose])
176
- puts "Registering crawlers - '#{dir}'"
177
- end
178
-
179
- files = File.join(dir, "**", "*.rb")
180
- Dir.glob(files).each do |file|
181
- require file
182
- end
183
-
184
- tmp = Apollo::Crawlers.constants.select { |c|
185
- Class === Apollo::Crawlers.const_get(c)
186
- }
187
-
188
- tmp.each do |x|
189
- klass = Object.const_get('Apollo').const_get('Crawlers').const_get(x)
190
- @crawlers.merge!({ x.downcase.to_s => klass})
191
- end
192
-
193
- if(@options[:verbose])
194
- @crawlers.each do |crawler, klass|
195
- name = klass.new.class.name
196
-
197
- if name == "Apollo::Crawlers::Crawler"
198
- next
199
- end
200
-
201
- puts "Registered crawler '#{crawler}' -> '#{name}'"
202
- end
203
- end
204
- end
205
-
206
- # Register formatters
207
- def register_formatters(dir)
208
- if(@options[:verbose])
209
- puts "Registering formatters - '#{dir}'"
210
- end
211
-
212
- files = File.join(dir, "**", "*.rb")
213
- Dir.glob(files).each do |file|
214
- require file
215
- end
216
-
217
- tmp = Apollo::Formatters.constants.select { |c|
218
- Class === Apollo::Formatters.const_get(c)
219
- }
220
-
221
- tmp.each do |x|
222
- klass = Object.const_get('Apollo').const_get('Formatters').const_get(x)
223
- @formatters.merge!({ x.downcase.to_s => klass})
224
- end
225
-
226
- if(@options[:verbose])
227
- @formatters.each do |formatter, klass|
228
- name = klass.new.class.name
229
-
230
- if name == "Apollo::Formatters::Formatter"
231
- next
232
- end
233
-
234
- puts "Registered formatter '#{formatter}' -> '#{name}'"
235
- end
236
- end
237
- end
238
-
239
- def generate_crawler(name, url = nil, matcher = nil)
240
- name = name.titleize.gsub(" ", "")
241
-
242
- if(@options[:verbose])
243
- puts "Generating new crawler '#{name}'"
244
- end
245
-
246
- template_path = File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', @@CRAWLER_TEMPLATE_NAME)
247
- if(File.exists?(template_path) == false)
248
- puts "Template file '#{template_path}' does not exists!"
249
- return
250
- end
251
-
252
- if(@options[:verbose])
253
- puts "Using template '#{template_path}'"
254
- end
255
-
256
- dest_path = File.join(Dir.pwd, "#{name.underscore}.rb")
257
-
258
- url = url ? url : "http://some-url-here"
259
- matcher = matcher ? matcher : "//a"
260
-
261
- placeholders = {
262
- "CRAWLER_CLASS_NAME" => name,
263
- "CRAWLER_NAME" => name.titleize,
264
- "CRAWLER_URL" => url,
265
- "CRAWLER_MATCHER" => matcher
266
- }
267
-
268
- puts "Generating crawler '#{name.titleize}', class: '#{name}', path: '#{dest_path}'"
269
-
270
- File.open(template_path, 'r') do |tmpl|
271
- File.open(dest_path, 'w') do |crawler|
272
- while line = tmpl.gets
273
- #puts line
274
- placeholders.each do |k, v|
275
- line.gsub!(k, v)
276
- end
277
-
278
- crawler.puts line
279
- end
280
- end
281
- end
282
- end
283
-
284
- def run
285
- init_options()
286
-
287
- parse_options()
288
-
289
- if(@options[:version])
290
- puts Apollo::VERSION
291
- exit
292
- end
293
-
294
- load_config_file()
295
-
296
- if(@options[:generate_crawler])
297
- name = @options[:generate_crawler]
298
- url = ARGV.length > 0 ? ARGV[0] : nil
299
- matcher = ARGV.length > 1 ? ARGV[1] : nil
300
-
301
- self.generate_crawler(name, url, matcher)
302
- exit
303
- end
304
-
305
- # Register caches which can be used
306
- @options[:cache_dirs].each do |dir|
307
- register_cache(dir)
308
- end
309
-
310
- # Register sites which can be crawled
311
- @options[:crawler_dirs].each do |dir|
312
- register_crawlers(dir)
313
- end
314
-
315
- # Register sites which can be crawled
316
- @options[:formatter_dirs].each do |dir|
317
- register_formatters(dir)
318
- end
319
-
320
- # Set default formatter here
321
- formatter_name = "json"
322
- if(@options[:formatter])
323
- formatter_name = @options[:formatter]
324
- end
325
-
326
- # Look for specified formatter
327
- f = @formatters.select { |k, v|
328
- k.downcase == formatter_name.downcase
329
- }
330
-
331
- if(f)
332
- @formatter = f[f.keys[0]]
333
- end
334
-
335
- if(@options[:list_formatters])
336
- headings = ['name', 'class']
337
- rows = @formatters
338
-
339
- table = Terminal::Table.new :headings => headings, :rows => rows
340
-
341
- puts table
342
- return
343
- end
344
-
345
- if(@options[:list_crawlers])
346
- headings = ['name', 'class']
347
- rows = @crawlers
348
-
349
- table = Terminal::Table.new :headings => headings, :rows => rows
350
-
351
- puts table
352
- return
353
- end
354
-
355
-
356
-
357
- crawlers = []
358
- if(ARGV.length > 0)
359
- crawlers << ARGV.shift
360
- end
361
-
362
- if(@options[:run_all])
363
- crawlers = @crawlers.keys
364
- end
365
-
366
- if(crawlers.empty?)
367
- puts @optparser
368
- exit
369
- end
370
-
371
- crawlers.each do |crawler|
372
- p = @crawlers[crawler.downcase]
373
- if(p == nil)
374
- puts "Invalid crawler name - '#{crawler}'"
375
- puts "See program help"
376
- next
377
- end
378
-
379
- if(@options[:verbose])
380
- puts "Running '#{crawler}'"
381
- end
382
-
383
- res = p.new.etl(ARGV)
384
- if(res.nil?)
385
- next
386
- end
387
-
388
- if(res.kind_of?(Array) == false)
389
- res = [res]
390
- end
391
-
392
- res.each do |tmp|
393
- puts @formatter.format(tmp)
394
- end
395
- end
396
- end
397
- end
398
- end
399
-
400
- if __FILE__ == $0
401
- Apollo::CrawlerProgram.new.run()
402
- else
403
- Apollo::CrawlerProgram.new.run()
404
- end
1
+ #! /usr/bin/env ruby
2
+
3
+ # encoding: utf-8
4
+
5
+ require "rubygems"
6
+ require "bundler/setup"
7
+
8
+ require 'json'
9
+
10
+ require "thor"
11
+
12
+ require "open-uri"
13
+ require "nokogiri"
14
+
15
+ require "pp"
16
+ require "optparse"
17
+
18
+ require 'active_support'
19
+ require 'active_support/inflector'
20
+
21
+ require 'terminal-table'
22
+
23
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
24
+
25
+ module Apollo
26
+ class CrawlerProgram
27
+ @@CACHES_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "caches")
28
+ @@CRAWLERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "crawlers")
29
+ @@FORMATTERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "formatters")
30
+ @@CRAWLER_TEMPLATE_NAME = "crawler_template.rb"
31
+
32
+ # This hash will hold all of the options
33
+ # parsed from the command-line by
34
+ # OptionParser.
35
+ @options = nil
36
+ @optparser = nil
37
+ @caches = nil
38
+ @crawlers = nil
39
+ @formatters = nil
40
+ @formatter = nil
41
+
42
+ # Initializer - Constructor
43
+ def initialize
44
+ @caches = {}
45
+ @crawlers = {}
46
+ @formatters = {}
47
+ end
48
+
49
+ # Initialize command-line options
50
+ def init_options
51
+ @options = {}
52
+ @options[:verbose] = false
53
+ @options[:version] = false
54
+ @options[:cache_dirs] = [
55
+ @@CACHES_DIR
56
+ ]
57
+ @options[:crawler_dirs] = [
58
+ @@CRAWLERS_DIR
59
+ ]
60
+ @options[:formatter_dirs] = [
61
+ @@FORMATTERS_DIR
62
+ ]
63
+ @options[:generate_crawler] = nil
64
+
65
+ @optparser = OptionParser.new do | opts |
66
+ # This displays the help screen, all programs are
67
+ # assumed to have this option.
68
+ opts.on('-h', '--help', 'Display this screen') do
69
+ puts opts
70
+ exit
71
+ end
72
+
73
+ opts.on('-a', '--all', 'Run all crawlers') do
74
+ @options[:run_all] = true
75
+ end
76
+
77
+ opts.on('-f', '--format [NAME]', "Formatter used") do |name|
78
+ @options[:formatter] = name
79
+ end
80
+
81
+ opts.on('-g', '--generate [NAME]', "Generate scaffold for new crawler") do |name|
82
+ @options[:generate_crawler] = name
83
+ end
84
+
85
+ opts.on('-i', '--include [PATH]', 'Include additional crawler or crawler directory') do |path|
86
+ @options[:crawler_dirs] << path
87
+ end
88
+
89
+ opts.on('-v', '--verbose', 'Enable verbose output') do
90
+ @options[:verbose] = true
91
+ end
92
+
93
+ opts.on('-V', '--version', 'Show version info') do
94
+ @options[:version] = true
95
+ end
96
+
97
+ opts.on('-l', '--list-crawlers', 'List of crawlers') do
98
+ @options[:list_crawlers] = true
99
+ end
100
+
101
+ opts.on(nil, '--list-formatters', 'List of formatters available') do
102
+ @options[:list_formatters] = true
103
+ end
104
+ end
105
+ end
106
+
107
+ # Parse the options passed to command-line
108
+ def parse_options
109
+ # Parse the command-line. Remember there are two forms
110
+ # of the parse method. The 'parse' method simply parses
111
+ # ARGV, while the 'parse!' method parses ARGV and removes
112
+ # any options found there, as well as any parameters for
113
+ # the options. What's left is the list of files to resize.
114
+ @optparser.parse!
115
+ end
116
+
117
+ # Load global options first
118
+ # Merge it with local options (if they exists)
119
+ def load_config_file()
120
+ config = File.join(File.dirname(__FILE__), "config", "crawler.rb")
121
+ if(File.exists?(config))
122
+ if(@options[:verbose])
123
+ puts "Loading config '#{config}'"
124
+ end
125
+
126
+ # puts "Let's require '#{@options[:verbose]}'"
127
+ require config
128
+ else
129
+ if(@options[:verbose])
130
+ # TODO: Add support for initial rake task generation
131
+ # Something like this:
132
+ # rake config:init # Initializes config files with
133
+ # their defaults (if not exists already)
134
+ puts "Default config does not exist, skipping - '#{config}'"
135
+ end
136
+ end
137
+ end
138
+
139
+ # Register caches
140
+ def register_cache(dir)
141
+ if(@options[:verbose])
142
+ puts "Registering caches - '#{dir}'"
143
+ end
144
+
145
+ files = File.join(dir, "**", "*.rb")
146
+ Dir.glob(files).each do |file|
147
+ require file
148
+ end
149
+
150
+ tmp = Apollo::Caches.constants.select { |c|
151
+ Class === Apollo::Caches.const_get(c)
152
+ }
153
+
154
+ tmp.each do |x|
155
+ klass = Object.const_get('Apollo').const_get('Caches').const_get(x)
156
+ @caches.merge!({ x.downcase.to_s => klass})
157
+ end
158
+
159
+ if(@options[:verbose])
160
+ @caches.each do |cache, klass|
161
+ name = klass
162
+
163
+ # klass.ancestors.include?(Apollo::Caches::Cache)
164
+ if name == "Apollo::Caches::Cache"
165
+ next
166
+ end
167
+
168
+ puts "Registered cache '#{cache}' -> '#{name}'"
169
+ end
170
+ end
171
+ end
172
+
173
+ # Register crawlers
174
+ def register_crawlers(dir)
175
+ if(@options[:verbose])
176
+ puts "Registering crawlers - '#{dir}'"
177
+ end
178
+
179
+ files = File.join(dir, "**", "*.rb")
180
+ Dir.glob(files).each do |file|
181
+ require file
182
+ end
183
+
184
+ tmp = Apollo::Crawlers.constants.select { |c|
185
+ Class === Apollo::Crawlers.const_get(c)
186
+ }
187
+
188
+ tmp.each do |x|
189
+ klass = Object.const_get('Apollo').const_get('Crawlers').const_get(x)
190
+ @crawlers.merge!({ x.downcase.to_s => klass})
191
+ end
192
+
193
+ if(@options[:verbose])
194
+ @crawlers.each do |crawler, klass|
195
+ name = klass.new.class.name
196
+
197
+ if name == "Apollo::Crawlers::Crawler"
198
+ next
199
+ end
200
+
201
+ puts "Registered crawler '#{crawler}' -> '#{name}'"
202
+ end
203
+ end
204
+ end
205
+
206
+ # Register formatters
207
+ def register_formatters(dir)
208
+ if(@options[:verbose])
209
+ puts "Registering formatters - '#{dir}'"
210
+ end
211
+
212
+ files = File.join(dir, "**", "*.rb")
213
+ Dir.glob(files).each do |file|
214
+ require file
215
+ end
216
+
217
+ tmp = Apollo::Formatters.constants.select { |c|
218
+ Class === Apollo::Formatters.const_get(c)
219
+ }
220
+
221
+ tmp.each do |x|
222
+ klass = Object.const_get('Apollo').const_get('Formatters').const_get(x)
223
+ @formatters.merge!({ x.downcase.to_s => klass})
224
+ end
225
+
226
+ if(@options[:verbose])
227
+ @formatters.each do |formatter, klass|
228
+ name = klass.new.class.name
229
+
230
+ if name == "Apollo::Formatters::Formatter"
231
+ next
232
+ end
233
+
234
+ puts "Registered formatter '#{formatter}' -> '#{name}'"
235
+ end
236
+ end
237
+ end
238
+
239
+ def generate_crawler(name, url = nil, matcher = nil)
240
+ name = name.titleize.gsub(" ", "")
241
+
242
+ if(@options[:verbose])
243
+ puts "Generating new crawler '#{name}'"
244
+ end
245
+
246
+ template_path = File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', @@CRAWLER_TEMPLATE_NAME)
247
+ if(File.exists?(template_path) == false)
248
+ puts "Template file '#{template_path}' does not exists!"
249
+ return
250
+ end
251
+
252
+ if(@options[:verbose])
253
+ puts "Using template '#{template_path}'"
254
+ end
255
+
256
+ dest_path = File.join(Dir.pwd, "#{name.underscore}.rb")
257
+
258
+ url = url ? url : "http://some-url-here"
259
+ matcher = matcher ? matcher : "//a"
260
+
261
+ placeholders = {
262
+ "CRAWLER_CLASS_NAME" => name,
263
+ "CRAWLER_NAME" => name.titleize,
264
+ "CRAWLER_URL" => url,
265
+ "CRAWLER_MATCHER" => matcher
266
+ }
267
+
268
+ puts "Generating crawler '#{name.titleize}', class: '#{name}', path: '#{dest_path}'"
269
+
270
+ File.open(template_path, 'r') do |tmpl|
271
+ File.open(dest_path, 'w') do |crawler|
272
+ while line = tmpl.gets
273
+ #puts line
274
+ placeholders.each do |k, v|
275
+ line.gsub!(k, v)
276
+ end
277
+
278
+ crawler.puts line
279
+ end
280
+ end
281
+ end
282
+ end
283
+
284
+ def run
285
+ init_options()
286
+
287
+ parse_options()
288
+
289
+ if(@options[:version])
290
+ puts Apollo::VERSION
291
+ exit
292
+ end
293
+
294
+ load_config_file()
295
+
296
+ if(@options[:generate_crawler])
297
+ name = @options[:generate_crawler]
298
+ url = ARGV.length > 0 ? ARGV[0] : nil
299
+ matcher = ARGV.length > 1 ? ARGV[1] : nil
300
+
301
+ self.generate_crawler(name, url, matcher)
302
+ exit
303
+ end
304
+
305
+ # Register caches which can be used
306
+ @options[:cache_dirs].each do |dir|
307
+ register_cache(dir)
308
+ end
309
+
310
+ # Register sites which can be crawled
311
+ @options[:crawler_dirs].each do |dir|
312
+ register_crawlers(dir)
313
+ end
314
+
315
+ # Register sites which can be crawled
316
+ @options[:formatter_dirs].each do |dir|
317
+ register_formatters(dir)
318
+ end
319
+
320
+ # Set default formatter here
321
+ formatter_name = "json"
322
+ if(@options[:formatter])
323
+ formatter_name = @options[:formatter]
324
+ end
325
+
326
+ # Look for specified formatter
327
+ f = @formatters.select { |k, v|
328
+ k.downcase == formatter_name.downcase
329
+ }
330
+
331
+ if(f)
332
+ @formatter = f[f.keys[0]]
333
+ end
334
+
335
+ if(@options[:list_formatters])
336
+ headings = ['name', 'class']
337
+ rows = @formatters
338
+
339
+ table = Terminal::Table.new :headings => headings, :rows => rows
340
+
341
+ puts table
342
+ return
343
+ end
344
+
345
+ if(@options[:list_crawlers])
346
+ headings = ['name', 'class']
347
+ rows = @crawlers
348
+
349
+ table = Terminal::Table.new :headings => headings, :rows => rows
350
+
351
+ puts table
352
+ return
353
+ end
354
+
355
+
356
+
357
+ crawlers = []
358
+ if(ARGV.length > 0)
359
+ crawlers << ARGV.shift
360
+ end
361
+
362
+ if(@options[:run_all])
363
+ crawlers = @crawlers.keys
364
+ end
365
+
366
+ if(crawlers.empty?)
367
+ puts @optparser
368
+ exit
369
+ end
370
+
371
+ crawlers.each do |crawler|
372
+ p = @crawlers[crawler.downcase]
373
+ if(p == nil)
374
+ puts "Invalid crawler name - '#{crawler}'"
375
+ puts "See program help"
376
+ next
377
+ end
378
+
379
+ if(@options[:verbose])
380
+ puts "Running '#{crawler}'"
381
+ end
382
+
383
+ res = p.new.etl(ARGV)
384
+ if(res.nil?)
385
+ next
386
+ end
387
+
388
+ if(res.kind_of?(Array) == false)
389
+ res = [res]
390
+ end
391
+
392
+ res.each do |tmp|
393
+ puts @formatter.format(tmp)
394
+ end
395
+ end
396
+ end
397
+ end
398
+ end
399
+
400
+ if __FILE__ == $0
401
+ Apollo::CrawlerProgram.new.run()
402
+ else
403
+ Apollo::CrawlerProgram.new.run()
404
+ end