apollo-crawler 0.0.17 → 0.0.18

Sign up to get free protection for your applications and to get access to all the features.
data/bin/apollo-crawler CHANGED
@@ -1,12 +1,170 @@
1
- #!/usr/bin/env ruby
2
-
3
- begin
4
- require 'apollo_crawler'
5
- require 'launchy'
6
- rescue LoadError
7
- require 'rubygems'
8
- require 'apollo_crawler'
9
- require 'launchy'
1
+ #! /usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require "bundler/setup"
5
+
6
+ require 'json'
7
+
8
+ require "thor"
9
+
10
+ require "open-uri"
11
+ require "nokogiri"
12
+
13
+ require "pp"
14
+ require "optparse"
15
+
16
+ module Crawler
17
+ class Program
18
+ # This hash will hold all of the options
19
+ # parsed from the command-line by
20
+ # OptionParser.
21
+ @options = nil
22
+ @optparser = nil
23
+ @plugins = nil
24
+
25
+ # Initializer - Constructor
26
+ def initialize
27
+ @plugins = {}
28
+ end
29
+
30
+ # Initialize command-line options
31
+ def init_options
32
+ @options = {}
33
+ @options[:verbose] = false
34
+
35
+ @optparser = OptionParser.new do | opts |
36
+ # This displays the help screen, all programs are
37
+ # assumed to have this option.
38
+ opts.on('-h', '--help', 'Display this screen') do
39
+ puts opts
40
+ exit
41
+ end
42
+
43
+ opts.on('-a', '--all', 'Run all plugins') do
44
+ @options[:run_all] = true
45
+ end
46
+
47
+ opts.on('-v', '--verbose', 'Enable verbose output') do
48
+ @options[:verbose] = true
49
+ end
50
+
51
+ opts.on('-l', '--list-plugins', 'List of plugins') do
52
+ @options[:list_plugins] = true
53
+ end
54
+ end
55
+ end
56
+
57
+ # Parse the options passed to command-line
58
+ def parse_options
59
+ # Parse the command-line. Remember there are two forms
60
+ # of the parse method. The 'parse' method simply parses
61
+ # ARGV, while the 'parse!' method parses ARGV and removes
62
+ # any options found there, as well as any parameters for
63
+ # the options. What's left is the list of files to resize.
64
+ @optparser.parse!
65
+ end
66
+
67
+ # Load global options first
68
+ # Merge it with local options (if they exists)
69
+ def load_config_file()
70
+ config = File.join(File.dirname(__FILE__), "config", "crawler.rb")
71
+ puts "Inspecting #{config} ..."
72
+ if(File.exists?(config))
73
+ if(@options[:verbose])
74
+ puts "Loading config '#{config}'"
75
+ end
76
+
77
+ # puts "Let's require '#{@options[:verbose]}'"
78
+ require config
79
+ else
80
+ if(@options[:verbose])
81
+ # TODO: Add support for initial rake task generation
82
+ # Something like this:
83
+ # rake config:init # Initializes config files with
84
+ # their defaults (if not exists already)
85
+ puts "Default config does not exist, skipping - '#{config}'"
86
+ end
87
+ end
88
+ end
89
+
90
+ # Register plugins (specific crawlers)
91
+ def register_plugins()
92
+ dir = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "plugins")
93
+ if(@options[:verbose])
94
+ puts "Registering plugins - '#{dir}'"
95
+ end
96
+
97
+ sites = File.join(dir, "**", "*.rb")
98
+ Dir.glob(sites).each do |site|
99
+ require site
100
+ end
101
+
102
+ tmp = Apollo::Crawler::Plugins.constants.select { |c|
103
+ Class === Apollo::Crawler::Plugins.const_get(c)
104
+ }
105
+
106
+ tmp.each do |x|
107
+ klass = Object.const_get('Apollo').const_get('Crawler').const_get('Plugins').const_get(x)
108
+ @plugins.merge!({ x.downcase.to_s => klass})
109
+ end
110
+
111
+ if(@options[:verbose])
112
+ @plugins.each do |plugin, klass|
113
+ name = klass.new.class.name
114
+
115
+ if name == "Apollo::Crawler::Plugins::Plugin"
116
+ next
117
+ end
118
+
119
+ puts "Registered '#{plugin}' -> '#{name}'"
120
+ end
121
+ end
122
+ end
123
+
124
+ def run
125
+ init_options()
126
+
127
+ load_config_file()
128
+
129
+ parse_options()
130
+
131
+ # Register sites which can be crawled
132
+ register_plugins()
133
+
134
+ if(@options[:list_plugins])
135
+ puts "Listing plugins"
136
+ puts "----------------------------------------"
137
+ i = 0
138
+ @plugins.sort.each do |plugin, klass|
139
+ instance = klass.new
140
+ # puts klass.class_eval("@@NAME")
141
+ puts "(#{i}) #{plugin} - #{instance.name}"
142
+ i += 1
143
+ end
144
+ puts "----------------------------------------"
145
+ return
146
+ end
147
+
148
+ plugins = ARGV
149
+
150
+ if(@options[:run_all])
151
+ plugins = @plugins.keys
152
+ end
153
+
154
+ if(plugins.empty?)
155
+ puts @optparser
156
+ end
157
+
158
+ plugins.each do |plugin|
159
+ p = @plugins[plugin.downcase].new
160
+
161
+ # puts "Running '#{plugin}'"
162
+ puts JSON.pretty_generate(p.run)
163
+ end
164
+ end
165
+ end
10
166
  end
11
167
 
12
- Launchy::Cli.new.run( ARGV, ENV )
168
+ if __FILE__ == $0
169
+ Crawler::Program.new.run()
170
+ end
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.17'
3
+ VERSION = '0.0.18'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.18
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -18,7 +18,6 @@ executables:
18
18
  extensions: []
19
19
  extra_rdoc_files: []
20
20
  files:
21
- - ./main.rb
22
21
  - ./lib/apollo_crawler/version.rb
23
22
  - ./lib/apollo_crawler/crawler.rb
24
23
  - ./lib/apollo_crawler/plugins/slashdot_org/slashdot.rb
data/main.rb DELETED
@@ -1,170 +0,0 @@
1
- #! /usr/bin/env ruby
2
-
3
- require "rubygems"
4
- require "bundler/setup"
5
-
6
- require 'json'
7
-
8
- require "thor"
9
-
10
- require "open-uri"
11
- require "nokogiri"
12
-
13
- require "pp"
14
- require "optparse"
15
-
16
- module Crawler
17
- class Program
18
- # This hash will hold all of the options
19
- # parsed from the command-line by
20
- # OptionParser.
21
- @options = nil
22
- @optparser = nil
23
- @plugins = nil
24
-
25
- # Initializer - Constructor
26
- def initialize
27
- @plugins = {}
28
- end
29
-
30
- # Initialize command-line options
31
- def init_options
32
- @options = {}
33
- @options[:verbose] = false
34
-
35
- @optparser = OptionParser.new do | opts |
36
- # This displays the help screen, all programs are
37
- # assumed to have this option.
38
- opts.on('-h', '--help', 'Display this screen') do
39
- puts opts
40
- exit
41
- end
42
-
43
- opts.on('-a', '--all', 'Run all plugins') do
44
- @options[:run_all] = true
45
- end
46
-
47
- opts.on('-v', '--verbose', 'Enable verbose output') do
48
- @options[:verbose] = true
49
- end
50
-
51
- opts.on('-l', '--list-plugins', 'List of plugins') do
52
- @options[:list_plugins] = true
53
- end
54
- end
55
- end
56
-
57
- # Parse the options passed to command-line
58
- def parse_options
59
- # Parse the command-line. Remember there are two forms
60
- # of the parse method. The 'parse' method simply parses
61
- # ARGV, while the 'parse!' method parses ARGV and removes
62
- # any options found there, as well as any parameters for
63
- # the options. What's left is the list of files to resize.
64
- @optparser.parse!
65
- end
66
-
67
- # Load global options first
68
- # Merge it with local options (if they exists)
69
- def load_config_file()
70
- config = File.join(File.dirname(__FILE__), "config", "crawler.rb")
71
- puts "Inspecting #{config} ..."
72
- if(File.exists?(config))
73
- if(@options[:verbose])
74
- puts "Loading config '#{config}'"
75
- end
76
-
77
- # puts "Let's require '#{@options[:verbose]}'"
78
- require config
79
- else
80
- if(@options[:verbose])
81
- # TODO: Add support for initial rake task generation
82
- # Something like this:
83
- # rake config:init # Initializes config files with
84
- # their defaults (if not exists already)
85
- puts "Default config does not exist, skipping - '#{config}'"
86
- end
87
- end
88
- end
89
-
90
- # Register plugins (specific crawlers)
91
- def register_plugins()
92
- dir = File.join(File.dirname(__FILE__), "lib", "apollo_crawler", "plugins")
93
- if(@options[:verbose])
94
- puts "Registering plugins - '#{dir}'"
95
- end
96
-
97
- sites = File.join(dir, "**", "*.rb")
98
- Dir.glob(sites).each do |site|
99
- require site
100
- end
101
-
102
- tmp = Apollo::Crawler::Plugins.constants.select { |c|
103
- Class === Apollo::Crawler::Plugins.const_get(c)
104
- }
105
-
106
- tmp.each do |x|
107
- klass = Object.const_get('Apollo').const_get('Crawler').const_get('Plugins').const_get(x)
108
- @plugins.merge!({ x.downcase.to_s => klass})
109
- end
110
-
111
- if(@options[:verbose])
112
- @plugins.each do |plugin, klass|
113
- name = klass.new.class.name
114
-
115
- if name == "Apollo::Crawler::Plugins::Plugin"
116
- next
117
- end
118
-
119
- puts "Registered '#{plugin}' -> '#{name}'"
120
- end
121
- end
122
- end
123
-
124
- def run
125
- init_options()
126
-
127
- load_config_file()
128
-
129
- parse_options()
130
-
131
- # Register sites which can be crawled
132
- register_plugins()
133
-
134
- if(@options[:list_plugins])
135
- puts "Listing plugins"
136
- puts "----------------------------------------"
137
- i = 0
138
- @plugins.sort.each do |plugin, klass|
139
- instance = klass.new
140
- # puts klass.class_eval("@@NAME")
141
- puts "(#{i}) #{plugin} - #{instance.name}"
142
- i += 1
143
- end
144
- puts "----------------------------------------"
145
- return
146
- end
147
-
148
- plugins = ARGV
149
-
150
- if(@options[:run_all])
151
- plugins = @plugins.keys
152
- end
153
-
154
- if(plugins.empty?)
155
- puts @optparser
156
- end
157
-
158
- plugins.each do |plugin|
159
- p = @plugins[plugin.downcase].new
160
-
161
- # puts "Running '#{plugin}'"
162
- puts JSON.pretty_generate(p.run)
163
- end
164
- end
165
- end
166
- end
167
-
168
- if __FILE__ == $0
169
- Crawler::Program.new.run()
170
- end