apollo-crawler 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/apollo-crawler CHANGED
@@ -1,12 +1,170 @@
1
- #!/usr/bin/env ruby
2
-
3
- begin
4
- require 'apollo_crawler'
5
- require 'launchy'
6
- rescue LoadError
7
- require 'rubygems'
8
- require 'apollo_crawler'
9
- require 'launchy'
1
+ #! /usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require "bundler/setup"
5
+
6
+ require 'json'
7
+
8
+ require "thor"
9
+
10
+ require "open-uri"
11
+ require "nokogiri"
12
+
13
+ require "pp"
14
+ require "optparse"
15
+
16
+ module Crawler
17
+ class Program
18
+ # This hash will hold all of the options
19
+ # parsed from the command-line by
20
+ # OptionParser.
21
+ @options = nil
22
+ @optparser = nil
23
+ @plugins = nil
24
+
25
+ # Initializer - Constructor
26
+ def initialize
27
+ @plugins = {}
28
+ end
29
+
30
+ # Initialize command-line options
31
+ def init_options
32
+ @options = {}
33
+ @options[:verbose] = false
34
+
35
+ @optparser = OptionParser.new do | opts |
36
+ # This displays the help screen, all programs are
37
+ # assumed to have this option.
38
+ opts.on('-h', '--help', 'Display this screen') do
39
+ puts opts
40
+ exit
41
+ end
42
+
43
+ opts.on('-a', '--all', 'Run all plugins') do
44
+ @options[:run_all] = true
45
+ end
46
+
47
+ opts.on('-v', '--verbose', 'Enable verbose output') do
48
+ @options[:verbose] = true
49
+ end
50
+
51
+ opts.on('-l', '--list-plugins', 'List of plugins') do
52
+ @options[:list_plugins] = true
53
+ end
54
+ end
55
+ end
56
+
57
+ # Parse the options passed to command-line
58
+ def parse_options
59
+ # Parse the command-line. Remember there are two forms
60
+ # of the parse method. The 'parse' method simply parses
61
+ # ARGV, while the 'parse!' method parses ARGV and removes
62
+ # any options found there, as well as any parameters for
63
+ # the options. What's left is the list of files to resize.
64
+ @optparser.parse!
65
+ end
66
+
67
+ # Load global options first
68
+ # Merge it with local options (if they exists)
69
+ def load_config_file()
70
+ config = File.join(File.dirname(__FILE__), "config", "crawler.rb")
71
+ puts "Inspecting #{config} ..."
72
+ if(File.exists?(config))
73
+ if(@options[:verbose])
74
+ puts "Loading config '#{config}'"
75
+ end
76
+
77
+ # puts "Let's require '#{@options[:verbose]}'"
78
+ require config
79
+ else
80
+ if(@options[:verbose])
81
+ # TODO: Add support for initial rake task generation
82
+ # Something like this:
83
+ # rake config:init # Initializes config files with
84
+ # their defaults (if not exists already)
85
+ puts "Default config does not exist, skipping - '#{config}'"
86
+ end
87
+ end
88
+ end
89
+
90
+ # Register plugins (specific crawlers)
91
+ def register_plugins()
92
+ dir = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "plugins")
93
+ if(@options[:verbose])
94
+ puts "Registering plugins - '#{dir}'"
95
+ end
96
+
97
+ sites = File.join(dir, "**", "*.rb")
98
+ Dir.glob(sites).each do |site|
99
+ require site
100
+ end
101
+
102
+ tmp = Apollo::Crawler::Plugins.constants.select { |c|
103
+ Class === Apollo::Crawler::Plugins.const_get(c)
104
+ }
105
+
106
+ tmp.each do |x|
107
+ klass = Object.const_get('Apollo').const_get('Crawler').const_get('Plugins').const_get(x)
108
+ @plugins.merge!({ x.downcase.to_s => klass})
109
+ end
110
+
111
+ if(@options[:verbose])
112
+ @plugins.each do |plugin, klass|
113
+ name = klass.new.class.name
114
+
115
+ if name == "Apollo::Crawler::Plugins::Plugin"
116
+ next
117
+ end
118
+
119
+ puts "Registered '#{plugin}' -> '#{name}'"
120
+ end
121
+ end
122
+ end
123
+
124
+ def run
125
+ init_options()
126
+
127
+ load_config_file()
128
+
129
+ parse_options()
130
+
131
+ # Register sites which can be crawled
132
+ register_plugins()
133
+
134
+ if(@options[:list_plugins])
135
+ puts "Listing plugins"
136
+ puts "----------------------------------------"
137
+ i = 0
138
+ @plugins.sort.each do |plugin, klass|
139
+ instance = klass.new
140
+ # puts klass.class_eval("@@NAME")
141
+ puts "(#{i}) #{plugin} - #{instance.name}"
142
+ i += 1
143
+ end
144
+ puts "----------------------------------------"
145
+ return
146
+ end
147
+
148
+ plugins = ARGV
149
+
150
+ if(@options[:run_all])
151
+ plugins = @plugins.keys
152
+ end
153
+
154
+ if(plugins.empty?)
155
+ puts @optparser
156
+ end
157
+
158
+ plugins.each do |plugin|
159
+ p = @plugins[plugin.downcase].new
160
+
161
+ # puts "Running '#{plugin}'"
162
+ puts JSON.pretty_generate(p.run)
163
+ end
164
+ end
165
+ end
10
166
  end
11
167
 
12
- Launchy::Cli.new.run( ARGV, ENV )
168
+ if __FILE__ == $0
169
+ Crawler::Program.new.run()
170
+ end
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.17'
3
+ VERSION = '0.0.18'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.18
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -18,7 +18,6 @@ executables:
18
18
  extensions: []
19
19
  extra_rdoc_files: []
20
20
  files:
21
- - ./main.rb
22
21
  - ./lib/apollo_crawler/version.rb
23
22
  - ./lib/apollo_crawler/crawler.rb
24
23
  - ./lib/apollo_crawler/plugins/slashdot_org/slashdot.rb
data/main.rb DELETED
@@ -1,170 +0,0 @@
1
- #! /usr/bin/env ruby
2
-
3
- require "rubygems"
4
- require "bundler/setup"
5
-
6
- require 'json'
7
-
8
- require "thor"
9
-
10
- require "open-uri"
11
- require "nokogiri"
12
-
13
- require "pp"
14
- require "optparse"
15
-
16
- module Crawler
17
- class Program
18
- # This hash will hold all of the options
19
- # parsed from the command-line by
20
- # OptionParser.
21
- @options = nil
22
- @optparser = nil
23
- @plugins = nil
24
-
25
- # Initializer - Constructor
26
- def initialize
27
- @plugins = {}
28
- end
29
-
30
- # Initialize command-line options
31
- def init_options
32
- @options = {}
33
- @options[:verbose] = false
34
-
35
- @optparser = OptionParser.new do | opts |
36
- # This displays the help screen, all programs are
37
- # assumed to have this option.
38
- opts.on('-h', '--help', 'Display this screen') do
39
- puts opts
40
- exit
41
- end
42
-
43
- opts.on('-a', '--all', 'Run all plugins') do
44
- @options[:run_all] = true
45
- end
46
-
47
- opts.on('-v', '--verbose', 'Enable verbose output') do
48
- @options[:verbose] = true
49
- end
50
-
51
- opts.on('-l', '--list-plugins', 'List of plugins') do
52
- @options[:list_plugins] = true
53
- end
54
- end
55
- end
56
-
57
- # Parse the options passed to command-line
58
- def parse_options
59
- # Parse the command-line. Remember there are two forms
60
- # of the parse method. The 'parse' method simply parses
61
- # ARGV, while the 'parse!' method parses ARGV and removes
62
- # any options found there, as well as any parameters for
63
- # the options. What's left is the list of files to resize.
64
- @optparser.parse!
65
- end
66
-
67
- # Load global options first
68
- # Merge it with local options (if they exists)
69
- def load_config_file()
70
- config = File.join(File.dirname(__FILE__), "config", "crawler.rb")
71
- puts "Inspecting #{config} ..."
72
- if(File.exists?(config))
73
- if(@options[:verbose])
74
- puts "Loading config '#{config}'"
75
- end
76
-
77
- # puts "Let's require '#{@options[:verbose]}'"
78
- require config
79
- else
80
- if(@options[:verbose])
81
- # TODO: Add support for initial rake task generation
82
- # Something like this:
83
- # rake config:init # Initializes config files with
84
- # their defaults (if not exists already)
85
- puts "Default config does not exist, skipping - '#{config}'"
86
- end
87
- end
88
- end
89
-
90
- # Register plugins (specific crawlers)
91
- def register_plugins()
92
- dir = File.join(File.dirname(__FILE__), "lib", "apollo_crawler", "plugins")
93
- if(@options[:verbose])
94
- puts "Registering plugins - '#{dir}'"
95
- end
96
-
97
- sites = File.join(dir, "**", "*.rb")
98
- Dir.glob(sites).each do |site|
99
- require site
100
- end
101
-
102
- tmp = Apollo::Crawler::Plugins.constants.select { |c|
103
- Class === Apollo::Crawler::Plugins.const_get(c)
104
- }
105
-
106
- tmp.each do |x|
107
- klass = Object.const_get('Apollo').const_get('Crawler').const_get('Plugins').const_get(x)
108
- @plugins.merge!({ x.downcase.to_s => klass})
109
- end
110
-
111
- if(@options[:verbose])
112
- @plugins.each do |plugin, klass|
113
- name = klass.new.class.name
114
-
115
- if name == "Apollo::Crawler::Plugins::Plugin"
116
- next
117
- end
118
-
119
- puts "Registered '#{plugin}' -> '#{name}'"
120
- end
121
- end
122
- end
123
-
124
- def run
125
- init_options()
126
-
127
- load_config_file()
128
-
129
- parse_options()
130
-
131
- # Register sites which can be crawled
132
- register_plugins()
133
-
134
- if(@options[:list_plugins])
135
- puts "Listing plugins"
136
- puts "----------------------------------------"
137
- i = 0
138
- @plugins.sort.each do |plugin, klass|
139
- instance = klass.new
140
- # puts klass.class_eval("@@NAME")
141
- puts "(#{i}) #{plugin} - #{instance.name}"
142
- i += 1
143
- end
144
- puts "----------------------------------------"
145
- return
146
- end
147
-
148
- plugins = ARGV
149
-
150
- if(@options[:run_all])
151
- plugins = @plugins.keys
152
- end
153
-
154
- if(plugins.empty?)
155
- puts @optparser
156
- end
157
-
158
- plugins.each do |plugin|
159
- p = @plugins[plugin.downcase].new
160
-
161
- # puts "Running '#{plugin}'"
162
- puts JSON.pretty_generate(p.run)
163
- end
164
- end
165
- end
166
- end
167
-
168
- if __FILE__ == $0
169
- Crawler::Program.new.run()
170
- end