apollo-crawler 0.0.26 → 0.0.28

Sign up to get free protection for your applications and to get access to all the features.
data/bin/apollo-crawler CHANGED
@@ -1,5 +1,7 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
+ # encoding: utf-8
4
+
3
5
  require "rubygems"
4
6
  require "bundler/setup"
5
7
 
@@ -13,6 +15,10 @@ require "nokogiri"
13
15
  require "pp"
14
16
  require "optparse"
15
17
 
18
+ require 'active_support'
19
+ require 'active_support/inflector'
20
+
21
+
16
22
  require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
17
23
 
18
24
  module Crawler
@@ -141,7 +147,9 @@ module Crawler
141
147
  end
142
148
  end
143
149
 
144
- def generate_plugin(name)
150
+ def generate_plugin(name, url = nil, matcher = nil)
151
+ name = name.titleize.gsub(" ", "")
152
+
145
153
  if(@options[:verbose])
146
154
  puts "Generating new plugin '#{name}'"
147
155
  end
@@ -156,15 +164,19 @@ module Crawler
156
164
  puts "Using template '#{template_path}'"
157
165
  end
158
166
 
159
- dest_path = File.join(Dir.pwd, "#{name}.rb")
167
+ dest_path = File.join(Dir.pwd, "#{name.underscore}.rb")
160
168
  if(@options[:verbose])
161
169
  puts "Generating '#{dest_path}'"
162
170
  end
163
171
 
172
+ url = url ? url : "http://some-url-here"
173
+ matcher = matcher ? matcher : "//a"
174
+
164
175
  placeholders = {
165
- "PLUGIN_NAME" => name,
166
- "PLUGIN_URL" => "http://some-url-here",
167
- "PLUGIN_MATCHER" => "//a"
176
+ "PLUGIN_CLASS_NAME" => name,
177
+ "PLUGIN_NAME" => name.titleize,
178
+ "PLUGIN_URL" => url,
179
+ "PLUGIN_MATCHER" => matcher
168
180
  }
169
181
 
170
182
  File.open(template_path, 'r') do |tmpl|
@@ -195,7 +207,10 @@ module Crawler
195
207
 
196
208
  if(@options[:generate_plugin])
197
209
  name = @options[:generate_plugin]
198
- self.generate_plugin(name)
210
+ url = ARGV.length > 0 ? ARGV[0] : nil
211
+ matcher = ARGV.length > 1 ? ARGV[1] : nil
212
+
213
+ self.generate_plugin(name, url, matcher)
199
214
  exit
200
215
  end
201
216
 
@@ -1,9 +1,10 @@
1
+ require 'iconv'
1
2
 
2
3
  module Apollo
3
4
  module Crawler
4
5
  module Plugins
5
6
  # PARAMATRIZE: Plugin class name
6
- class PLUGIN_NAME < Plugin
7
+ class PLUGIN_CLASS_NAME < Plugin
7
8
  @@URL = "PLUGIN_URL"
8
9
 
9
10
  @@MATCHER_ITEM = "PLUGIN_MATCHER"
@@ -13,9 +14,11 @@ module Apollo
13
14
  end
14
15
 
15
16
  def run()
16
- # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
17
- doc = Nokogiri::HTML(open(@@URL))
17
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
18
18
 
19
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
20
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
21
+
19
22
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
20
23
  {
21
24
  :text => i.text,
@@ -1,3 +1,5 @@
1
+ require 'iconv'
2
+
1
3
  require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
2
4
 
3
5
  module Apollo
@@ -14,9 +16,10 @@ module Apollo
14
16
  end
15
17
 
16
18
  def run()
17
- # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
18
- doc = Nokogiri::HTML(open(@@URL))
19
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
19
20
 
21
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
22
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
20
23
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
21
24
  {
22
25
  :text => i.text,
@@ -1,3 +1,5 @@
1
+ require 'iconv'
2
+
1
3
  require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
2
4
 
3
5
  module Apollo
@@ -14,7 +16,10 @@ module Apollo
14
16
  end
15
17
 
16
18
  def run()
17
- doc = Nokogiri::HTML(open(@@URL))
19
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
20
+
21
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
22
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
18
23
 
19
24
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
20
25
  {
@@ -1,3 +1,5 @@
1
+ require 'iconv'
2
+
1
3
  require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
2
4
 
3
5
  module Apollo
@@ -14,7 +16,10 @@ module Apollo
14
16
  end
15
17
 
16
18
  def run()
17
- doc = Nokogiri::HTML(open(@@URL))
19
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
20
+
21
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
22
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
18
23
 
19
24
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
20
25
  {
@@ -1,3 +1,5 @@
1
+ require 'iconv'
2
+
1
3
  require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
2
4
 
3
5
  module Apollo
@@ -14,7 +16,10 @@ module Apollo
14
16
  end
15
17
 
16
18
  def run()
17
- doc = Nokogiri::HTML(open(@@URL))
19
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
20
+
21
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
22
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
18
23
 
19
24
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
20
25
  {
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.26'
3
+ VERSION = '0.0.28'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.26
4
+ version: 0.0.28
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: