reliefweb_scraper 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -10
- data/Rakefile +1 -1
- data/lib/reliefweb.rb +16 -24
- data/reliefweb_scraper.gemspec +5 -5
- data/spec/fixtures/vcr_cassettes/reliefweb.yml +1084 -693
- data/spec/lib/reliefweb_spec.rb +7 -15
- metadata +97 -114
- data/lib/glidenumber.rb +0 -58
- data/spec/fixtures/vcr_cassettes/glidenumber.yml +0 -869
- data/spec/lib/glidenumber_spec.rb +0 -21
data/README.md
CHANGED
@@ -1,18 +1,16 @@
|
|
1
1
|
Reliefweb Scraper
|
2
2
|
=================
|
3
3
|
|
4
|
-
*
|
5
|
-
* Retrieves extra information by searching http://glidenumber.net
|
4
|
+
* Feeds featured disasters from http://reliefweb.int/
|
6
5
|
|
7
|
-
|
8
|
-
so you can work with them in the following ways:
|
6
|
+
Reliefweb::Disaster is a subclass of Hashie::Mash, so you can do the following:
|
9
7
|
|
10
8
|
disaster = Reliefweb.featured_disasters.first
|
11
9
|
puts disaster["title"]
|
12
10
|
puts disaster.title
|
13
11
|
|
14
12
|
|
15
|
-
* Run "rake featured_disasters:fetch" to see the disaster data
|
13
|
+
* Run "rake featured_disasters:fetch" to see the disaster data
|
16
14
|
* Run "rake spec" to run the rspec tests
|
17
15
|
** Note: run "rm spec/fixtures/vcr_cassettes/*.yml" if you want to re-record the vcr specs
|
18
16
|
|
@@ -21,11 +19,7 @@ Examples
|
|
21
19
|
|
22
20
|
Return an array of featured disasters:
|
23
21
|
|
24
|
-
Reliefweb.featured_disasters
|
25
|
-
|
26
|
-
Fetch disaster information from http://glidenumber.net :
|
27
|
-
|
28
|
-
Glidenumber.find("OT-2011-000110-UGA")
|
22
|
+
Reliefweb.featured_disasters
|
29
23
|
|
30
24
|
|
31
25
|
History
|
@@ -33,3 +27,4 @@ History
|
|
33
27
|
|
34
28
|
* version 0.2.0 - initial release
|
35
29
|
* version 0.3.1 - updated to reflect new changes to the ReliefWeb website
|
30
|
+
* version 0.4.0 - using new RSS feed from ReliefWeb, no need for GlideNumber anymore
|
data/Rakefile
CHANGED
data/lib/reliefweb.rb
CHANGED
@@ -1,35 +1,27 @@
|
|
1
|
-
require 'date'
|
2
1
|
require 'hashie'
|
3
|
-
require '
|
4
|
-
require
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'open-uri'
|
5
4
|
|
6
5
|
module Reliefweb
|
7
|
-
class ParseError < StandardError; end
|
8
6
|
|
9
|
-
|
10
|
-
@agent ||= Mechanize.new
|
11
|
-
end
|
7
|
+
class Disaster < Hashie::Mash; end
|
12
8
|
|
13
9
|
def self.featured_disasters(options = {})
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
Disaster.new({:title => title, :url =>
|
10
|
+
url = 'http://reliefweb.int/disasters/rss.xml?sl=environment-disaster_listing'
|
11
|
+
xml = Nokogiri::XML(open(url))
|
12
|
+
xml.xpath('//channel/item').map do |item|
|
13
|
+
title = item.xpath('title').text.sub(/-[^-]*$/, '').strip
|
14
|
+
url = item.xpath('link').text
|
15
|
+
country_code = item.xpath('reliefweb:iso3').map(&:text).join(', ')
|
16
|
+
date = Time.parse(item.xpath('pubDate').text)
|
17
|
+
glidenumber = item.xpath('reliefweb:glide').text
|
18
|
+
disaster_type = item.xpath('reliefweb:disaster_type').map(&:text).join(', ')
|
19
|
+
current = true
|
20
|
+
Disaster.new({:title => title, :url => url, :country_code => country_code,
|
21
|
+
:date => date, :glidenumber => glidenumber, :disaster_type => disaster_type,
|
22
|
+
:current => current})
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
28
|
-
def self.fetch_glidenumber(link)
|
29
|
-
agent.get(link)
|
30
|
-
agent.page.parser.css("div.views-field-entity-id-3 div.field-content").text.strip
|
31
|
-
end
|
32
|
-
|
33
|
-
class Disaster < Hashie::Mash; end
|
34
26
|
end
|
35
27
|
|
data/reliefweb_scraper.gemspec
CHANGED
@@ -3,21 +3,21 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "reliefweb_scraper"
|
6
|
-
s.version = "0.
|
6
|
+
s.version = "0.4.0"
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ["Nathan Broadbent", "Stanley Lau", "Stephen Kenworthy"]
|
9
9
|
s.email = ["itdept@crossroads.org.hk"]
|
10
10
|
s.homepage = "http://www.crossroads.org.hk"
|
11
|
-
s.summary = %q{
|
12
|
-
s.description = %q{
|
11
|
+
s.summary = %q{Feeds disasters from Reliefweb}
|
12
|
+
s.description = %q{Gathers disasters from Reliefweb via RSS and presents them as a Disaster hashie}
|
13
13
|
|
14
14
|
s.files = `git ls-files`.split("\n")
|
15
15
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
16
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
17
|
s.require_paths = ["lib"]
|
18
18
|
|
19
|
-
s.add_dependency('
|
20
|
-
s.add_dependency('hashie',
|
19
|
+
s.add_dependency('nokogiri', ">= 1.5.0")
|
20
|
+
s.add_dependency('hashie', ">= 1.1.0")
|
21
21
|
|
22
22
|
s.add_development_dependency("rspec", "~> 2.5.0")
|
23
23
|
s.add_development_dependency("vcr", "~> 1.11.1")
|