apollo-crawler 0.0.47 → 0.0.48

Sign up to get free protection for your applications and to get access to all the features.
@@ -25,7 +25,7 @@ module Apollo
25
25
  # - (3) Go to (0) eventually
26
26
  def etl(url=nil)
27
27
  # Look for passed URL use default instead and fail if it is not valid
28
- if(url.empty?)
28
+ if(url.nil? || url.empty?)
29
29
  url = self.url
30
30
  end
31
31
 
@@ -81,7 +81,7 @@ module Apollo
81
81
  return nil
82
82
  end
83
83
 
84
- raw = open(self.url).read
84
+ raw = open(url).read
85
85
 
86
86
  # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
87
87
  doc = Nokogiri::HTML(raw)
@@ -0,0 +1,26 @@
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Google < Crawler
6
+ @@MATCHER_ITEM = "//h3/a"
7
+
8
+ def name()
9
+ return "Google"
10
+ end
11
+
12
+ def url()
13
+ return "http://www.google.com/search?q=ruby"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.47'
3
+ VERSION = '0.0.48'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.47
4
+ version: 0.0.48
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -234,6 +234,7 @@ files:
234
234
  - ./lib/apollo_crawler/crawler.rb
235
235
  - ./lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb
236
236
  - ./lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb
237
+ - ./lib/apollo_crawler/crawlers/google_com/google.rb
237
238
  - ./lib/apollo_crawler/crawlers/slashdot_org/slashdot.rb
238
239
  - ./lib/apollo_crawler/crawlers/firmy_cz/firmy.rb
239
240
  - ./lib/apollo_crawler/crawlers/alexa_com/alexa.rb
@@ -244,7 +245,6 @@ files:
244
245
  homepage: https://github.com/korczis/apollo-crawler
245
246
  licenses:
246
247
  - MIT
247
- - GPL-2
248
248
  post_install_message: Thanks for installing Apollo Crawler!
249
249
  rdoc_options: []
250
250
  require_paths: