apollo-crawler 0.0.26 → 0.0.28
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/apollo-crawler +21 -6
- data/lib/apollo_crawler/plugin_template.rb +6 -3
- data/lib/apollo_crawler/plugins/alexa_com/alexa.rb +5 -2
- data/lib/apollo_crawler/plugins/firmy_cz/firmy.rb +6 -1
- data/lib/apollo_crawler/plugins/slashdot_org/slashdot.rb +6 -1
- data/lib/apollo_crawler/plugins/ycombinator_com/hacker_news.rb +6 -1
- data/lib/apollo_crawler/version.rb +1 -1
- metadata +1 -1
data/bin/apollo-crawler
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
+
# encoding: utf-8
|
4
|
+
|
3
5
|
require "rubygems"
|
4
6
|
require "bundler/setup"
|
5
7
|
|
@@ -13,6 +15,10 @@ require "nokogiri"
|
|
13
15
|
require "pp"
|
14
16
|
require "optparse"
|
15
17
|
|
18
|
+
require 'active_support'
|
19
|
+
require 'active_support/inflector'
|
20
|
+
|
21
|
+
|
16
22
|
require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
|
17
23
|
|
18
24
|
module Crawler
|
@@ -141,7 +147,9 @@ module Crawler
|
|
141
147
|
end
|
142
148
|
end
|
143
149
|
|
144
|
-
def generate_plugin(name)
|
150
|
+
def generate_plugin(name, url = nil, matcher = nil)
|
151
|
+
name = name.titleize.gsub(" ", "")
|
152
|
+
|
145
153
|
if(@options[:verbose])
|
146
154
|
puts "Generating new plugin '#{name}'"
|
147
155
|
end
|
@@ -156,15 +164,19 @@ module Crawler
|
|
156
164
|
puts "Using template '#{template_path}'"
|
157
165
|
end
|
158
166
|
|
159
|
-
dest_path = File.join(Dir.pwd, "#{name}.rb")
|
167
|
+
dest_path = File.join(Dir.pwd, "#{name.underscore}.rb")
|
160
168
|
if(@options[:verbose])
|
161
169
|
puts "Generating '#{dest_path}'"
|
162
170
|
end
|
163
171
|
|
172
|
+
url = url ? url : "http://some-url-here"
|
173
|
+
matcher = matcher ? matcher : "//a"
|
174
|
+
|
164
175
|
placeholders = {
|
165
|
-
"
|
166
|
-
"
|
167
|
-
"
|
176
|
+
"PLUGIN_CLASS_NAME" => name,
|
177
|
+
"PLUGIN_NAME" => name.titleize,
|
178
|
+
"PLUGIN_URL" => url,
|
179
|
+
"PLUGIN_MATCHER" => matcher
|
168
180
|
}
|
169
181
|
|
170
182
|
File.open(template_path, 'r') do |tmpl|
|
@@ -195,7 +207,10 @@ module Crawler
|
|
195
207
|
|
196
208
|
if(@options[:generate_plugin])
|
197
209
|
name = @options[:generate_plugin]
|
198
|
-
|
210
|
+
url = ARGV.length > 0 ? ARGV[0] : nil
|
211
|
+
matcher = ARGV.length > 1 ? ARGV[1] : nil
|
212
|
+
|
213
|
+
self.generate_plugin(name, url, matcher)
|
199
214
|
exit
|
200
215
|
end
|
201
216
|
|
@@ -1,9 +1,10 @@
|
|
1
|
+
require 'iconv'
|
1
2
|
|
2
3
|
module Apollo
|
3
4
|
module Crawler
|
4
5
|
module Plugins
|
5
6
|
# PARAMATRIZE: Plugin class name
|
6
|
-
class
|
7
|
+
class PLUGIN_CLASS_NAME < Plugin
|
7
8
|
@@URL = "PLUGIN_URL"
|
8
9
|
|
9
10
|
@@MATCHER_ITEM = "PLUGIN_MATCHER"
|
@@ -13,9 +14,11 @@ module Apollo
|
|
13
14
|
end
|
14
15
|
|
15
16
|
def run()
|
16
|
-
|
17
|
-
doc = Nokogiri::HTML(open(@@URL))
|
17
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
18
18
|
|
19
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
20
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
21
|
+
|
19
22
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
20
23
|
{
|
21
24
|
:text => i.text,
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
1
3
|
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
2
4
|
|
3
5
|
module Apollo
|
@@ -14,9 +16,10 @@ module Apollo
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def run()
|
17
|
-
|
18
|
-
doc = Nokogiri::HTML(open(@@URL))
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
19
20
|
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
20
23
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
21
24
|
{
|
22
25
|
:text => i.text,
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
1
3
|
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
2
4
|
|
3
5
|
module Apollo
|
@@ -14,7 +16,10 @@ module Apollo
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def run()
|
17
|
-
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
20
|
+
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
18
23
|
|
19
24
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
20
25
|
{
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
1
3
|
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
2
4
|
|
3
5
|
module Apollo
|
@@ -14,7 +16,10 @@ module Apollo
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def run()
|
17
|
-
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
20
|
+
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
18
23
|
|
19
24
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
20
25
|
{
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
1
3
|
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
2
4
|
|
3
5
|
module Apollo
|
@@ -14,7 +16,10 @@ module Apollo
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def run()
|
17
|
-
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
20
|
+
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
18
23
|
|
19
24
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
20
25
|
{
|