reliefweb_scraper 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -10
- data/Rakefile +1 -1
- data/lib/reliefweb.rb +16 -24
- data/reliefweb_scraper.gemspec +5 -5
- data/spec/fixtures/vcr_cassettes/reliefweb.yml +1084 -693
- data/spec/lib/reliefweb_spec.rb +7 -15
- metadata +97 -114
- data/lib/glidenumber.rb +0 -58
- data/spec/fixtures/vcr_cassettes/glidenumber.yml +0 -869
- data/spec/lib/glidenumber_spec.rb +0 -21
data/README.md
CHANGED
@@ -1,18 +1,16 @@
|
|
1
1
|
Reliefweb Scraper
|
2
2
|
=================
|
3
3
|
|
4
|
-
*
|
5
|
-
* Retrieves extra information by searching http://glidenumber.net
|
4
|
+
* Feeds featured disasters from http://reliefweb.int/
|
6
5
|
|
7
|
-
|
8
|
-
so you can work with them in the following ways:
|
6
|
+
Reliefweb::Disaster is a subclass of Hashie::Mash, so you can do the following:
|
9
7
|
|
10
8
|
disaster = Reliefweb.featured_disasters.first
|
11
9
|
puts disaster["title"]
|
12
10
|
puts disaster.title
|
13
11
|
|
14
12
|
|
15
|
-
* Run "rake featured_disasters:fetch" to see the disaster data
|
13
|
+
* Run "rake featured_disasters:fetch" to see the disaster data
|
16
14
|
* Run "rake spec" to run the rspec tests
|
17
15
|
** Note: run "rm spec/fixtures/vcr_cassettes/*.yml" if you want to re-record the vcr specs
|
18
16
|
|
@@ -21,11 +19,7 @@ Examples
|
|
21
19
|
|
22
20
|
Return an array of featured disasters:
|
23
21
|
|
24
|
-
Reliefweb.featured_disasters
|
25
|
-
|
26
|
-
Fetch disaster information from http://glidenumber.net :
|
27
|
-
|
28
|
-
Glidenumber.find("OT-2011-000110-UGA")
|
22
|
+
Reliefweb.featured_disasters
|
29
23
|
|
30
24
|
|
31
25
|
History
|
@@ -33,3 +27,4 @@ History
|
|
33
27
|
|
34
28
|
* version 0.2.0 - initial release
|
35
29
|
* version 0.3.1 - updated to reflect new changes to the ReliefWeb website
|
30
|
+
* version 0.4.0 - using new RSS feed from ReliefWeb, no need for GlideNumber anymore
|
data/Rakefile
CHANGED
data/lib/reliefweb.rb
CHANGED
@@ -1,35 +1,27 @@
|
|
1
|
-
require 'date'
|
2
1
|
require 'hashie'
|
3
|
-
require '
|
4
|
-
require
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'open-uri'
|
5
4
|
|
6
5
|
module Reliefweb
|
7
|
-
class ParseError < StandardError; end
|
8
6
|
|
9
|
-
|
10
|
-
@agent ||= Mechanize.new
|
11
|
-
end
|
7
|
+
class Disaster < Hashie::Mash; end
|
12
8
|
|
13
9
|
def self.featured_disasters(options = {})
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
Disaster.new({:title => title, :url =>
|
10
|
+
url = 'http://reliefweb.int/disasters/rss.xml?sl=environment-disaster_listing'
|
11
|
+
xml = Nokogiri::XML(open(url))
|
12
|
+
xml.xpath('//channel/item').map do |item|
|
13
|
+
title = item.xpath('title').text.sub(/-[^-]*$/, '').strip
|
14
|
+
url = item.xpath('link').text
|
15
|
+
country_code = item.xpath('reliefweb:iso3').map(&:text).join(', ')
|
16
|
+
date = Time.parse(item.xpath('pubDate').text)
|
17
|
+
glidenumber = item.xpath('reliefweb:glide').text
|
18
|
+
disaster_type = item.xpath('reliefweb:disaster_type').map(&:text).join(', ')
|
19
|
+
current = true
|
20
|
+
Disaster.new({:title => title, :url => url, :country_code => country_code,
|
21
|
+
:date => date, :glidenumber => glidenumber, :disaster_type => disaster_type,
|
22
|
+
:current => current})
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
28
|
-
def self.fetch_glidenumber(link)
|
29
|
-
agent.get(link)
|
30
|
-
agent.page.parser.css("div.views-field-entity-id-3 div.field-content").text.strip
|
31
|
-
end
|
32
|
-
|
33
|
-
class Disaster < Hashie::Mash; end
|
34
26
|
end
|
35
27
|
|
data/reliefweb_scraper.gemspec
CHANGED
@@ -3,21 +3,21 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "reliefweb_scraper"
|
6
|
-
s.version = "0.
|
6
|
+
s.version = "0.4.0"
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ["Nathan Broadbent", "Stanley Lau", "Stephen Kenworthy"]
|
9
9
|
s.email = ["itdept@crossroads.org.hk"]
|
10
10
|
s.homepage = "http://www.crossroads.org.hk"
|
11
|
-
s.summary = %q{
|
12
|
-
s.description = %q{
|
11
|
+
s.summary = %q{Feeds disasters from Reliefweb}
|
12
|
+
s.description = %q{Gathers disasters from Reliefweb via RSS and presents them as a Disaster hashie}
|
13
13
|
|
14
14
|
s.files = `git ls-files`.split("\n")
|
15
15
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
16
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
17
|
s.require_paths = ["lib"]
|
18
18
|
|
19
|
-
s.add_dependency('
|
20
|
-
s.add_dependency('hashie',
|
19
|
+
s.add_dependency('nokogiri', ">= 1.5.0")
|
20
|
+
s.add_dependency('hashie', ">= 1.1.0")
|
21
21
|
|
22
22
|
s.add_development_dependency("rspec", "~> 2.5.0")
|
23
23
|
s.add_development_dependency("vcr", "~> 1.11.1")
|