reliefweb_scraper 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/glidenumber.rb +8 -1
- data/lib/reliefweb.rb +1 -1
- data/reliefweb_scraper.gemspec +1 -1
- data/spec/lib/glidenumber_spec.rb +3 -1
- data/spec/lib/reliefweb_spec.rb +1 -0
- metadata +20 -14
data/lib/glidenumber.rb
CHANGED
|
@@ -38,6 +38,9 @@ module Glidenumber
|
|
|
38
38
|
end
|
|
39
39
|
link = links.first
|
|
40
40
|
glidenumber = link.text.strip
|
|
41
|
+
glidenumber.match /([A-Z]{2})-\d{4}-\d{6}-([A-Z]{3})/
|
|
42
|
+
disaster_type, country_code = $1, $2
|
|
43
|
+
|
|
41
44
|
agent.get(link.attributes["href"].to_s)
|
|
42
45
|
|
|
43
46
|
country = agent.page.parser.css("table tr td table.basefontSmall tr[3] td.bgLightLight strong").text.strip.split.last
|
|
@@ -45,7 +48,11 @@ module Glidenumber
|
|
|
45
48
|
date = agent.page.parser.css("table tr td table.basefontSmall tr[5] td.bgLightLight[2]").text.strip
|
|
46
49
|
date = Date.strptime(date, '%Y-%m-%d')
|
|
47
50
|
|
|
48
|
-
Record.new({:country => country,
|
|
51
|
+
Record.new({:country => country,
|
|
52
|
+
:country_code => country_code,
|
|
53
|
+
:date => date,
|
|
54
|
+
:glidenumber => glidenumber,
|
|
55
|
+
:disaster_type => disaster_type})
|
|
49
56
|
end
|
|
50
57
|
end
|
|
51
58
|
|
data/lib/reliefweb.rb
CHANGED
|
@@ -25,7 +25,7 @@ module Reliefweb
|
|
|
25
25
|
reliefweb_glide = fetch_glidenumber(href)
|
|
26
26
|
puts "== Fetching glide details for #{reliefweb_glide}..." if options[:verbose]
|
|
27
27
|
record = Glidenumber.find(reliefweb_glide)
|
|
28
|
-
Disaster.new({:title => title}.merge(record.to_hash))
|
|
28
|
+
Disaster.new({:title => title, :url => URI.join("http://reliefweb.int", href).to_s}.merge(record.to_hash))
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
|
data/reliefweb_scraper.gemspec
CHANGED
|
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "reliefweb_scraper"
|
|
6
|
-
s.version = "0.
|
|
6
|
+
s.version = "0.2.0"
|
|
7
7
|
s.platform = Gem::Platform::RUBY
|
|
8
8
|
s.authors = ["Nathan Broadbent", "Stanley Lau"]
|
|
9
9
|
s.email = ["it_dept@crossroads.org.hk"]
|
|
@@ -7,9 +7,11 @@ describe Glidenumber do
|
|
|
7
7
|
it 'should find the details of a disaster by glide number' do
|
|
8
8
|
record = Glidenumber.find('OT-2011-000110-UGA')
|
|
9
9
|
record.should be_a(Glidenumber::Record)
|
|
10
|
+
record.glidenumber.should == 'MS-2011-000110-UGA'
|
|
10
11
|
record.country.should == "Uganda"
|
|
12
|
+
record.country_code.should == 'UGA'
|
|
13
|
+
record.disaster_type.should == 'MS'
|
|
11
14
|
record.date.should == Date.strptime('2011-8-12', '%Y-%m-%d')
|
|
12
|
-
record.glidenumber.should == 'MS-2011-000110-UGA'
|
|
13
15
|
end
|
|
14
16
|
|
|
15
17
|
it 'should raise an error on invalid glide number' do
|
data/spec/lib/reliefweb_spec.rb
CHANGED
|
@@ -15,6 +15,7 @@ describe Reliefweb do
|
|
|
15
15
|
disasters.should_not be_empty
|
|
16
16
|
disaster = disasters.first
|
|
17
17
|
disaster.should be_a(Reliefweb::Disaster)
|
|
18
|
+
disaster.url.should == "http://reliefweb.int/horn-africa-crisis2011"
|
|
18
19
|
disaster.country.should == "Uganda"
|
|
19
20
|
disaster.date.should == Date.strptime('2011-8-12', '%Y-%m-%d')
|
|
20
21
|
disaster.glidenumber.should == 'MS-2011-000110-UGA'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: reliefweb_scraper
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -10,11 +10,11 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date: 2011-08-
|
|
13
|
+
date: 2011-08-26 00:00:00.000000000Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: mechanize
|
|
17
|
-
requirement: &
|
|
17
|
+
requirement: &78537160 !ruby/object:Gem::Requirement
|
|
18
18
|
none: false
|
|
19
19
|
requirements:
|
|
20
20
|
- - ! '>='
|
|
@@ -22,10 +22,10 @@ dependencies:
|
|
|
22
22
|
version: 2.0.1
|
|
23
23
|
type: :runtime
|
|
24
24
|
prerelease: false
|
|
25
|
-
version_requirements: *
|
|
25
|
+
version_requirements: *78537160
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: hashie
|
|
28
|
-
requirement: &
|
|
28
|
+
requirement: &78536880 !ruby/object:Gem::Requirement
|
|
29
29
|
none: false
|
|
30
30
|
requirements:
|
|
31
31
|
- - ! '>='
|
|
@@ -33,10 +33,10 @@ dependencies:
|
|
|
33
33
|
version: 1.1.0
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
|
-
version_requirements: *
|
|
36
|
+
version_requirements: *78536880
|
|
37
37
|
- !ruby/object:Gem::Dependency
|
|
38
38
|
name: rspec
|
|
39
|
-
requirement: &
|
|
39
|
+
requirement: &78536610 !ruby/object:Gem::Requirement
|
|
40
40
|
none: false
|
|
41
41
|
requirements:
|
|
42
42
|
- - ~>
|
|
@@ -44,10 +44,10 @@ dependencies:
|
|
|
44
44
|
version: 2.5.0
|
|
45
45
|
type: :development
|
|
46
46
|
prerelease: false
|
|
47
|
-
version_requirements: *
|
|
47
|
+
version_requirements: *78536610
|
|
48
48
|
- !ruby/object:Gem::Dependency
|
|
49
49
|
name: vcr
|
|
50
|
-
requirement: &
|
|
50
|
+
requirement: &78536300 !ruby/object:Gem::Requirement
|
|
51
51
|
none: false
|
|
52
52
|
requirements:
|
|
53
53
|
- - ~>
|
|
@@ -55,10 +55,10 @@ dependencies:
|
|
|
55
55
|
version: 1.11.1
|
|
56
56
|
type: :development
|
|
57
57
|
prerelease: false
|
|
58
|
-
version_requirements: *
|
|
58
|
+
version_requirements: *78536300
|
|
59
59
|
- !ruby/object:Gem::Dependency
|
|
60
60
|
name: webmock
|
|
61
|
-
requirement: &
|
|
61
|
+
requirement: &78535740 !ruby/object:Gem::Requirement
|
|
62
62
|
none: false
|
|
63
63
|
requirements:
|
|
64
64
|
- - ~>
|
|
@@ -66,10 +66,10 @@ dependencies:
|
|
|
66
66
|
version: 1.7.0
|
|
67
67
|
type: :development
|
|
68
68
|
prerelease: false
|
|
69
|
-
version_requirements: *
|
|
69
|
+
version_requirements: *78535740
|
|
70
70
|
- !ruby/object:Gem::Dependency
|
|
71
71
|
name: awesome_print
|
|
72
|
-
requirement: &
|
|
72
|
+
requirement: &78535430 !ruby/object:Gem::Requirement
|
|
73
73
|
none: false
|
|
74
74
|
requirements:
|
|
75
75
|
- - ~>
|
|
@@ -77,7 +77,7 @@ dependencies:
|
|
|
77
77
|
version: 0.4.0
|
|
78
78
|
type: :development
|
|
79
79
|
prerelease: false
|
|
80
|
-
version_requirements: *
|
|
80
|
+
version_requirements: *78535430
|
|
81
81
|
description: Scrapes reliefweb's featured disasters, and pulls additional information
|
|
82
82
|
from glidenumber.net
|
|
83
83
|
email:
|
|
@@ -111,12 +111,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
111
111
|
- - ! '>='
|
|
112
112
|
- !ruby/object:Gem::Version
|
|
113
113
|
version: '0'
|
|
114
|
+
segments:
|
|
115
|
+
- 0
|
|
116
|
+
hash: -664477265
|
|
114
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
115
118
|
none: false
|
|
116
119
|
requirements:
|
|
117
120
|
- - ! '>='
|
|
118
121
|
- !ruby/object:Gem::Version
|
|
119
122
|
version: '0'
|
|
123
|
+
segments:
|
|
124
|
+
- 0
|
|
125
|
+
hash: -664477265
|
|
120
126
|
requirements: []
|
|
121
127
|
rubyforge_project:
|
|
122
128
|
rubygems_version: 1.8.6
|