apollo-crawler 0.0.26 → 0.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/apollo-crawler +21 -6
- data/lib/apollo_crawler/plugin_template.rb +6 -3
- data/lib/apollo_crawler/plugins/alexa_com/alexa.rb +5 -2
- data/lib/apollo_crawler/plugins/firmy_cz/firmy.rb +6 -1
- data/lib/apollo_crawler/plugins/slashdot_org/slashdot.rb +6 -1
- data/lib/apollo_crawler/plugins/ycombinator_com/hacker_news.rb +6 -1
- data/lib/apollo_crawler/version.rb +1 -1
- metadata +1 -1
data/bin/apollo-crawler
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
+
# encoding: utf-8
|
4
|
+
|
3
5
|
require "rubygems"
|
4
6
|
require "bundler/setup"
|
5
7
|
|
@@ -13,6 +15,10 @@ require "nokogiri"
|
|
13
15
|
require "pp"
|
14
16
|
require "optparse"
|
15
17
|
|
18
|
+
require 'active_support'
|
19
|
+
require 'active_support/inflector'
|
20
|
+
|
21
|
+
|
16
22
|
require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
|
17
23
|
|
18
24
|
module Crawler
|
@@ -141,7 +147,9 @@ module Crawler
|
|
141
147
|
end
|
142
148
|
end
|
143
149
|
|
144
|
-
def generate_plugin(name)
|
150
|
+
def generate_plugin(name, url = nil, matcher = nil)
|
151
|
+
name = name.titleize.gsub(" ", "")
|
152
|
+
|
145
153
|
if(@options[:verbose])
|
146
154
|
puts "Generating new plugin '#{name}'"
|
147
155
|
end
|
@@ -156,15 +164,19 @@ module Crawler
|
|
156
164
|
puts "Using template '#{template_path}'"
|
157
165
|
end
|
158
166
|
|
159
|
-
dest_path = File.join(Dir.pwd, "#{name}.rb")
|
167
|
+
dest_path = File.join(Dir.pwd, "#{name.underscore}.rb")
|
160
168
|
if(@options[:verbose])
|
161
169
|
puts "Generating '#{dest_path}'"
|
162
170
|
end
|
163
171
|
|
172
|
+
url = url ? url : "http://some-url-here"
|
173
|
+
matcher = matcher ? matcher : "//a"
|
174
|
+
|
164
175
|
placeholders = {
|
165
|
-
"
|
166
|
-
"
|
167
|
-
"
|
176
|
+
"PLUGIN_CLASS_NAME" => name,
|
177
|
+
"PLUGIN_NAME" => name.titleize,
|
178
|
+
"PLUGIN_URL" => url,
|
179
|
+
"PLUGIN_MATCHER" => matcher
|
168
180
|
}
|
169
181
|
|
170
182
|
File.open(template_path, 'r') do |tmpl|
|
@@ -195,7 +207,10 @@ module Crawler
|
|
195
207
|
|
196
208
|
if(@options[:generate_plugin])
|
197
209
|
name = @options[:generate_plugin]
|
198
|
-
|
210
|
+
url = ARGV.length > 0 ? ARGV[0] : nil
|
211
|
+
matcher = ARGV.length > 1 ? ARGV[1] : nil
|
212
|
+
|
213
|
+
self.generate_plugin(name, url, matcher)
|
199
214
|
exit
|
200
215
|
end
|
201
216
|
|
@@ -1,9 +1,10 @@
|
|
1
|
+
require 'iconv'
|
1
2
|
|
2
3
|
module Apollo
|
3
4
|
module Crawler
|
4
5
|
module Plugins
|
5
6
|
# PARAMATRIZE: Plugin class name
|
6
|
-
class
|
7
|
+
class PLUGIN_CLASS_NAME < Plugin
|
7
8
|
@@URL = "PLUGIN_URL"
|
8
9
|
|
9
10
|
@@MATCHER_ITEM = "PLUGIN_MATCHER"
|
@@ -13,9 +14,11 @@ module Apollo
|
|
13
14
|
end
|
14
15
|
|
15
16
|
def run()
|
16
|
-
|
17
|
-
doc = Nokogiri::HTML(open(@@URL))
|
17
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
18
18
|
|
19
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
20
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
21
|
+
|
19
22
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
20
23
|
{
|
21
24
|
:text => i.text,
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
1
3
|
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
2
4
|
|
3
5
|
module Apollo
|
@@ -14,9 +16,10 @@ module Apollo
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def run()
|
17
|
-
|
18
|
-
doc = Nokogiri::HTML(open(@@URL))
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
19
20
|
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
20
23
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
21
24
|
{
|
22
25
|
:text => i.text,
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
1
3
|
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
2
4
|
|
3
5
|
module Apollo
|
@@ -14,7 +16,10 @@ module Apollo
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def run()
|
17
|
-
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
20
|
+
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
18
23
|
|
19
24
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
20
25
|
{
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
1
3
|
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
2
4
|
|
3
5
|
module Apollo
|
@@ -14,7 +16,10 @@ module Apollo
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def run()
|
17
|
-
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
20
|
+
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
18
23
|
|
19
24
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
20
25
|
{
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
1
3
|
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
2
4
|
|
3
5
|
module Apollo
|
@@ -14,7 +16,10 @@ module Apollo
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def run()
|
17
|
-
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
20
|
+
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
18
23
|
|
19
24
|
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
20
25
|
{
|