apollo-crawler 0.0.26 → 0.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/apollo-crawler CHANGED
@@ -1,5 +1,7 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
+ # encoding: utf-8
4
+
3
5
  require "rubygems"
4
6
  require "bundler/setup"
5
7
 
@@ -13,6 +15,10 @@ require "nokogiri"
13
15
  require "pp"
14
16
  require "optparse"
15
17
 
18
+ require 'active_support'
19
+ require 'active_support/inflector'
20
+
21
+
16
22
  require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
17
23
 
18
24
  module Crawler
@@ -141,7 +147,9 @@ module Crawler
141
147
  end
142
148
  end
143
149
 
144
- def generate_plugin(name)
150
+ def generate_plugin(name, url = nil, matcher = nil)
151
+ name = name.titleize.gsub(" ", "")
152
+
145
153
  if(@options[:verbose])
146
154
  puts "Generating new plugin '#{name}'"
147
155
  end
@@ -156,15 +164,19 @@ module Crawler
156
164
  puts "Using template '#{template_path}'"
157
165
  end
158
166
 
159
- dest_path = File.join(Dir.pwd, "#{name}.rb")
167
+ dest_path = File.join(Dir.pwd, "#{name.underscore}.rb")
160
168
  if(@options[:verbose])
161
169
  puts "Generating '#{dest_path}'"
162
170
  end
163
171
 
172
+ url = url ? url : "http://some-url-here"
173
+ matcher = matcher ? matcher : "//a"
174
+
164
175
  placeholders = {
165
- "PLUGIN_NAME" => name,
166
- "PLUGIN_URL" => "http://some-url-here",
167
- "PLUGIN_MATCHER" => "//a"
176
+ "PLUGIN_CLASS_NAME" => name,
177
+ "PLUGIN_NAME" => name.titleize,
178
+ "PLUGIN_URL" => url,
179
+ "PLUGIN_MATCHER" => matcher
168
180
  }
169
181
 
170
182
  File.open(template_path, 'r') do |tmpl|
@@ -195,7 +207,10 @@ module Crawler
195
207
 
196
208
  if(@options[:generate_plugin])
197
209
  name = @options[:generate_plugin]
198
- self.generate_plugin(name)
210
+ url = ARGV.length > 0 ? ARGV[0] : nil
211
+ matcher = ARGV.length > 1 ? ARGV[1] : nil
212
+
213
+ self.generate_plugin(name, url, matcher)
199
214
  exit
200
215
  end
201
216
 
@@ -1,9 +1,10 @@
1
+ require 'iconv'
1
2
 
2
3
  module Apollo
3
4
  module Crawler
4
5
  module Plugins
5
6
  # PARAMATRIZE: Plugin class name
6
- class PLUGIN_NAME < Plugin
7
+ class PLUGIN_CLASS_NAME < Plugin
7
8
  @@URL = "PLUGIN_URL"
8
9
 
9
10
  @@MATCHER_ITEM = "PLUGIN_MATCHER"
@@ -13,9 +14,11 @@ module Apollo
13
14
  end
14
15
 
15
16
  def run()
16
- # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
17
- doc = Nokogiri::HTML(open(@@URL))
17
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
18
18
 
19
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
20
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
21
+
19
22
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
20
23
  {
21
24
  :text => i.text,
@@ -1,3 +1,5 @@
1
+ require 'iconv'
2
+
1
3
  require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
2
4
 
3
5
  module Apollo
@@ -14,9 +16,10 @@ module Apollo
14
16
  end
15
17
 
16
18
  def run()
17
- # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
18
- doc = Nokogiri::HTML(open(@@URL))
19
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
19
20
 
21
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
22
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
20
23
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
21
24
  {
22
25
  :text => i.text,
@@ -1,3 +1,5 @@
1
+ require 'iconv'
2
+
1
3
  require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
2
4
 
3
5
  module Apollo
@@ -14,7 +16,10 @@ module Apollo
14
16
  end
15
17
 
16
18
  def run()
17
- doc = Nokogiri::HTML(open(@@URL))
19
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
20
+
21
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
22
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
18
23
 
19
24
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
20
25
  {
@@ -1,3 +1,5 @@
1
+ require 'iconv'
2
+
1
3
  require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
2
4
 
3
5
  module Apollo
@@ -14,7 +16,10 @@ module Apollo
14
16
  end
15
17
 
16
18
  def run()
17
- doc = Nokogiri::HTML(open(@@URL))
19
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
20
+
21
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
22
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
18
23
 
19
24
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
20
25
  {
@@ -1,3 +1,5 @@
1
+ require 'iconv'
2
+
1
3
  require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
2
4
 
3
5
  module Apollo
@@ -14,7 +16,10 @@ module Apollo
14
16
  end
15
17
 
16
18
  def run()
17
- doc = Nokogiri::HTML(open(@@URL))
19
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
20
+
21
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
22
+ doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
18
23
 
19
24
  res = doc.xpath(@@MATCHER_ITEM).map { |i|
20
25
  {
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.26'
3
+ VERSION = '0.0.28'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.26
4
+ version: 0.0.28
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: