apollo-crawler 0.0.47 → 0.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@ module Apollo
25
25
  # - (3) Go to (0) eventually
26
26
  def etl(url=nil)
27
27
  # Look for passed URL use default instead and fail if it is not valid
28
- if(url.empty?)
28
+ if(url.nil? || url.empty?)
29
29
  url = self.url
30
30
  end
31
31
 
@@ -81,7 +81,7 @@ module Apollo
81
81
  return nil
82
82
  end
83
83
 
84
- raw = open(self.url).read
84
+ raw = open(url).read
85
85
 
86
86
  # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
87
87
  doc = Nokogiri::HTML(raw)
@@ -0,0 +1,26 @@
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Google < Crawler
6
+ @@MATCHER_ITEM = "//h3/a"
7
+
8
+ def name()
9
+ return "Google"
10
+ end
11
+
12
+ def url()
13
+ return "http://www.google.com/search?q=ruby"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.47'
3
+ VERSION = '0.0.48'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.47
4
+ version: 0.0.48
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -234,6 +234,7 @@ files:
234
234
  - ./lib/apollo_crawler/crawler.rb
235
235
  - ./lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb
236
236
  - ./lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb
237
+ - ./lib/apollo_crawler/crawlers/google_com/google.rb
237
238
  - ./lib/apollo_crawler/crawlers/slashdot_org/slashdot.rb
238
239
  - ./lib/apollo_crawler/crawlers/firmy_cz/firmy.rb
239
240
  - ./lib/apollo_crawler/crawlers/alexa_com/alexa.rb
@@ -244,7 +245,6 @@ files:
244
245
  homepage: https://github.com/korczis/apollo-crawler
245
246
  licenses:
246
247
  - MIT
247
- - GPL-2
248
248
  post_install_message: Thanks for installing Apollo Crawler!
249
249
  rdoc_options: []
250
250
  require_paths: