reliefweb_scraper 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -10
- data/Rakefile +1 -1
- data/lib/reliefweb.rb +16 -24
- data/reliefweb_scraper.gemspec +5 -5
- data/spec/fixtures/vcr_cassettes/reliefweb.yml +1084 -693
- data/spec/lib/reliefweb_spec.rb +7 -15
- metadata +97 -114
- data/lib/glidenumber.rb +0 -58
- data/spec/fixtures/vcr_cassettes/glidenumber.yml +0 -869
- data/spec/lib/glidenumber_spec.rb +0 -21
data/spec/lib/reliefweb_spec.rb
CHANGED
@@ -5,26 +5,18 @@ describe Reliefweb do
|
|
5
5
|
use_vcr_cassette 'reliefweb', :record => :new_episodes
|
6
6
|
|
7
7
|
it 'should fetch a list of featured disasters' do
|
8
|
-
Reliefweb.should_receive(:fetch_glidenumber).at_least(:once).with(/^\//).and_return("OT-2011-000110-UGA")
|
9
|
-
Glidenumber.should_receive(:find).at_least(:once).with("OT-2011-000110-UGA").and_return(
|
10
|
-
Glidenumber::Record.new(:country => "Uganda",
|
11
|
-
:date => Date.new(2011, 8, 12),
|
12
|
-
:glidenumber => "MS-2011-000110-UGA"))
|
13
|
-
|
14
8
|
disasters = Reliefweb.featured_disasters
|
15
9
|
disasters.should_not be_empty
|
16
10
|
disaster = disasters.first
|
17
11
|
disaster.should be_a(Reliefweb::Disaster)
|
18
|
-
disaster.url.should == "http://reliefweb.int/
|
19
|
-
disaster.
|
20
|
-
disaster.date.should ==
|
21
|
-
disaster.glidenumber.should == '
|
22
|
-
disaster.title.should == '
|
12
|
+
disaster.url.should == "http://reliefweb.int/disaster/tc-2012-000036-mdg"
|
13
|
+
disaster.country_code.should == "mdg, moz"
|
14
|
+
disaster.date.should == Time.parse('Mon, 05 Mar 2012 00:00:00 +0000')
|
15
|
+
disaster.glidenumber.should == 'TC-2012-000036-MDG'
|
16
|
+
disaster.title.should == 'Tropical Storm Irina'
|
17
|
+
disaster.current.should be_true
|
18
|
+
disaster.disaster_type.should == "Tropical Cyclone, Flood, Violent Wind"
|
23
19
|
end
|
24
20
|
|
25
|
-
it 'should parse a glide number from a disaster page' do
|
26
|
-
glidenumber = Reliefweb.fetch_glidenumber("http://reliefweb.int/taxonomy/term/8956")
|
27
|
-
glidenumber.should == "MS-2011-000110-UGA"
|
28
|
-
end
|
29
21
|
end
|
30
22
|
|
metadata
CHANGED
@@ -1,177 +1,160 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: reliefweb_scraper
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 3
|
9
|
-
- 1
|
10
|
-
version: 0.3.1
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Nathan Broadbent
|
14
9
|
- Stanley Lau
|
15
10
|
- Stephen Kenworthy
|
16
11
|
autorequire:
|
17
12
|
bindir: bin
|
18
13
|
cert_chain: []
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
name: mechanize
|
25
|
-
prerelease: false
|
26
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
14
|
+
date: 2012-03-24 00:00:00.000000000Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: nokogiri
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
27
19
|
none: false
|
28
|
-
requirements:
|
29
|
-
- -
|
30
|
-
- !ruby/object:Gem::Version
|
31
|
-
|
32
|
-
segments:
|
33
|
-
- 2
|
34
|
-
- 0
|
35
|
-
- 1
|
36
|
-
version: 2.0.1
|
20
|
+
requirements:
|
21
|
+
- - ! '>='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.5.0
|
37
24
|
type: :runtime
|
38
|
-
version_requirements: *id001
|
39
|
-
- !ruby/object:Gem::Dependency
|
40
|
-
name: hashie
|
41
25
|
prerelease: false
|
42
|
-
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - ! '>='
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 1.5.0
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: hashie
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
43
35
|
none: false
|
44
|
-
requirements:
|
45
|
-
- -
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
hash: 19
|
48
|
-
segments:
|
49
|
-
- 1
|
50
|
-
- 1
|
51
|
-
- 0
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
52
39
|
version: 1.1.0
|
53
40
|
type: :runtime
|
54
|
-
version_requirements: *id002
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rspec
|
57
41
|
prerelease: false
|
58
|
-
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.1.0
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rspec
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
59
51
|
none: false
|
60
|
-
requirements:
|
52
|
+
requirements:
|
61
53
|
- - ~>
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
hash: 27
|
64
|
-
segments:
|
65
|
-
- 2
|
66
|
-
- 5
|
67
|
-
- 0
|
54
|
+
- !ruby/object:Gem::Version
|
68
55
|
version: 2.5.0
|
69
56
|
type: :development
|
70
|
-
version_requirements: *id003
|
71
|
-
- !ruby/object:Gem::Dependency
|
72
|
-
name: vcr
|
73
57
|
prerelease: false
|
74
|
-
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
59
|
none: false
|
76
|
-
requirements:
|
60
|
+
requirements:
|
77
61
|
- - ~>
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 2.5.0
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
name: vcr
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ~>
|
70
|
+
- !ruby/object:Gem::Version
|
84
71
|
version: 1.11.1
|
85
72
|
type: :development
|
86
|
-
version_requirements: *id004
|
87
|
-
- !ruby/object:Gem::Dependency
|
88
|
-
name: webmock
|
89
73
|
prerelease: false
|
90
|
-
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
91
75
|
none: false
|
92
|
-
requirements:
|
76
|
+
requirements:
|
93
77
|
- - ~>
|
94
|
-
- !ruby/object:Gem::Version
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.11.1
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: webmock
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
100
87
|
version: 1.7.0
|
101
88
|
type: :development
|
102
|
-
version_requirements: *id005
|
103
|
-
- !ruby/object:Gem::Dependency
|
104
|
-
name: awesome_print
|
105
89
|
prerelease: false
|
106
|
-
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
91
|
none: false
|
108
|
-
requirements:
|
92
|
+
requirements:
|
109
93
|
- - ~>
|
110
|
-
- !ruby/object:Gem::Version
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: 1.7.0
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: awesome_print
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
116
103
|
version: 0.4.0
|
117
104
|
type: :development
|
118
|
-
|
119
|
-
|
120
|
-
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ~>
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: 0.4.0
|
112
|
+
description: Gathers disasters from Reliefweb via RSS and presents them as a Disaster
|
113
|
+
hashie
|
114
|
+
email:
|
121
115
|
- itdept@crossroads.org.hk
|
122
116
|
executables: []
|
123
|
-
|
124
117
|
extensions: []
|
125
|
-
|
126
118
|
extra_rdoc_files: []
|
127
|
-
|
128
|
-
files:
|
119
|
+
files:
|
129
120
|
- .gitignore
|
130
121
|
- .rspec
|
131
122
|
- Gemfile
|
132
123
|
- README.md
|
133
124
|
- Rakefile
|
134
|
-
- lib/glidenumber.rb
|
135
125
|
- lib/reliefweb.rb
|
136
126
|
- reliefweb_scraper.gemspec
|
137
|
-
- spec/fixtures/vcr_cassettes/glidenumber.yml
|
138
127
|
- spec/fixtures/vcr_cassettes/reliefweb.yml
|
139
|
-
- spec/lib/glidenumber_spec.rb
|
140
128
|
- spec/lib/reliefweb_spec.rb
|
141
129
|
- spec/spec_helper.rb
|
142
|
-
has_rdoc: true
|
143
130
|
homepage: http://www.crossroads.org.hk
|
144
131
|
licenses: []
|
145
|
-
|
146
132
|
post_install_message:
|
147
133
|
rdoc_options: []
|
148
|
-
|
149
|
-
require_paths:
|
134
|
+
require_paths:
|
150
135
|
- lib
|
151
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
136
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
152
137
|
none: false
|
153
|
-
requirements:
|
154
|
-
- -
|
155
|
-
- !ruby/object:Gem::Version
|
156
|
-
|
157
|
-
segments:
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
segments:
|
158
143
|
- 0
|
159
|
-
|
160
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
144
|
+
hash: -475121041
|
145
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
146
|
none: false
|
162
|
-
requirements:
|
163
|
-
- -
|
164
|
-
- !ruby/object:Gem::Version
|
165
|
-
|
166
|
-
segments:
|
147
|
+
requirements:
|
148
|
+
- - ! '>='
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
segments:
|
167
152
|
- 0
|
168
|
-
|
153
|
+
hash: -475121041
|
169
154
|
requirements: []
|
170
|
-
|
171
155
|
rubyforge_project:
|
172
|
-
rubygems_version: 1.
|
156
|
+
rubygems_version: 1.8.20
|
173
157
|
signing_key:
|
174
158
|
specification_version: 3
|
175
|
-
summary:
|
159
|
+
summary: Feeds disasters from Reliefweb
|
176
160
|
test_files: []
|
177
|
-
|
data/lib/glidenumber.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
require 'hashie'
|
3
|
-
require 'mechanize'
|
4
|
-
|
5
|
-
module Glidenumber
|
6
|
-
class SearchError < StandardError; end
|
7
|
-
class ParseError < StandardError; end
|
8
|
-
|
9
|
-
class Record < Hashie::Mash; end
|
10
|
-
|
11
|
-
def self.find(glidenumber)
|
12
|
-
unless query = glidenumber.upcase[/([0-9]{4}-[0-9]{6}-[A-Z]{3})/,1]
|
13
|
-
raise ArgumentError.new("Glidenumber is invalid. Should contain: 1234-123456-ABC")
|
14
|
-
end
|
15
|
-
agent = Mechanize.new
|
16
|
-
agent.post("http://glidenumber.net/glide/public/search/search.jsp",
|
17
|
-
{'X_Resolution' => "1280",
|
18
|
-
'events' => "*",
|
19
|
-
'ftoption' => "&",
|
20
|
-
'go.x' => "0",
|
21
|
-
'go.y' => "0",
|
22
|
-
'keywords' => query,
|
23
|
-
'level0' => "*",
|
24
|
-
'level1' => "*",
|
25
|
-
'maxhits' => "10",
|
26
|
-
'nStart' => "0",
|
27
|
-
'posted' => "0",
|
28
|
-
'process' => "0",
|
29
|
-
'sortby' => "0"}
|
30
|
-
)
|
31
|
-
links = agent.page.parser.css("table tr td.bgLightLight table tr td table tr.bgLightLight[2] a")
|
32
|
-
if links.size != 1
|
33
|
-
if links.empty?
|
34
|
-
raise SearchError.new("Did not find any records matching: #{query}")
|
35
|
-
else
|
36
|
-
raise SearchError.new("Found too many records matching: #{query}")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
link = links.first
|
40
|
-
glidenumber = link.text.strip
|
41
|
-
glidenumber.match /([A-Z]{2})-\d{4}-\d{6}-([A-Z]{3})/
|
42
|
-
disaster_type, country_code = $1, $2
|
43
|
-
|
44
|
-
agent.get(link.attributes["href"].to_s)
|
45
|
-
|
46
|
-
country = agent.page.parser.css("table tr td table.basefontSmall tr[3] td.bgLightLight strong").text.strip.split.last
|
47
|
-
raise ParseError.new("Could not find country on Glidenumber page!") if country == ""
|
48
|
-
date = agent.page.parser.css("table tr td table.basefontSmall tr[5] td.bgLightLight[2]").text.strip
|
49
|
-
date = Date.strptime(date, '%Y-%m-%d')
|
50
|
-
|
51
|
-
Record.new({:country => country,
|
52
|
-
:country_code => country_code,
|
53
|
-
:date => date,
|
54
|
-
:glidenumber => glidenumber,
|
55
|
-
:disaster_type => disaster_type})
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|