reliefweb_scraper 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -10
- data/Rakefile +1 -1
- data/lib/reliefweb.rb +16 -24
- data/reliefweb_scraper.gemspec +5 -5
- data/spec/fixtures/vcr_cassettes/reliefweb.yml +1084 -693
- data/spec/lib/reliefweb_spec.rb +7 -15
- metadata +97 -114
- data/lib/glidenumber.rb +0 -58
- data/spec/fixtures/vcr_cassettes/glidenumber.yml +0 -869
- data/spec/lib/glidenumber_spec.rb +0 -21
data/spec/lib/reliefweb_spec.rb
CHANGED
@@ -5,26 +5,18 @@ describe Reliefweb do
|
|
5
5
|
use_vcr_cassette 'reliefweb', :record => :new_episodes
|
6
6
|
|
7
7
|
it 'should fetch a list of featured disasters' do
|
8
|
-
Reliefweb.should_receive(:fetch_glidenumber).at_least(:once).with(/^\//).and_return("OT-2011-000110-UGA")
|
9
|
-
Glidenumber.should_receive(:find).at_least(:once).with("OT-2011-000110-UGA").and_return(
|
10
|
-
Glidenumber::Record.new(:country => "Uganda",
|
11
|
-
:date => Date.new(2011, 8, 12),
|
12
|
-
:glidenumber => "MS-2011-000110-UGA"))
|
13
|
-
|
14
8
|
disasters = Reliefweb.featured_disasters
|
15
9
|
disasters.should_not be_empty
|
16
10
|
disaster = disasters.first
|
17
11
|
disaster.should be_a(Reliefweb::Disaster)
|
18
|
-
disaster.url.should == "http://reliefweb.int/
|
19
|
-
disaster.
|
20
|
-
disaster.date.should ==
|
21
|
-
disaster.glidenumber.should == '
|
22
|
-
disaster.title.should == '
|
12
|
+
disaster.url.should == "http://reliefweb.int/disaster/tc-2012-000036-mdg"
|
13
|
+
disaster.country_code.should == "mdg, moz"
|
14
|
+
disaster.date.should == Time.parse('Mon, 05 Mar 2012 00:00:00 +0000')
|
15
|
+
disaster.glidenumber.should == 'TC-2012-000036-MDG'
|
16
|
+
disaster.title.should == 'Tropical Storm Irina'
|
17
|
+
disaster.current.should be_true
|
18
|
+
disaster.disaster_type.should == "Tropical Cyclone, Flood, Violent Wind"
|
23
19
|
end
|
24
20
|
|
25
|
-
it 'should parse a glide number from a disaster page' do
|
26
|
-
glidenumber = Reliefweb.fetch_glidenumber("http://reliefweb.int/taxonomy/term/8956")
|
27
|
-
glidenumber.should == "MS-2011-000110-UGA"
|
28
|
-
end
|
29
21
|
end
|
30
22
|
|
metadata
CHANGED
@@ -1,177 +1,160 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: reliefweb_scraper
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 3
|
9
|
-
- 1
|
10
|
-
version: 0.3.1
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Nathan Broadbent
|
14
9
|
- Stanley Lau
|
15
10
|
- Stephen Kenworthy
|
16
11
|
autorequire:
|
17
12
|
bindir: bin
|
18
13
|
cert_chain: []
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
name: mechanize
|
25
|
-
prerelease: false
|
26
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
14
|
+
date: 2012-03-24 00:00:00.000000000Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: nokogiri
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
27
19
|
none: false
|
28
|
-
requirements:
|
29
|
-
- -
|
30
|
-
- !ruby/object:Gem::Version
|
31
|
-
|
32
|
-
segments:
|
33
|
-
- 2
|
34
|
-
- 0
|
35
|
-
- 1
|
36
|
-
version: 2.0.1
|
20
|
+
requirements:
|
21
|
+
- - ! '>='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.5.0
|
37
24
|
type: :runtime
|
38
|
-
version_requirements: *id001
|
39
|
-
- !ruby/object:Gem::Dependency
|
40
|
-
name: hashie
|
41
25
|
prerelease: false
|
42
|
-
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - ! '>='
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 1.5.0
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: hashie
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
43
35
|
none: false
|
44
|
-
requirements:
|
45
|
-
- -
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
hash: 19
|
48
|
-
segments:
|
49
|
-
- 1
|
50
|
-
- 1
|
51
|
-
- 0
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
52
39
|
version: 1.1.0
|
53
40
|
type: :runtime
|
54
|
-
version_requirements: *id002
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rspec
|
57
41
|
prerelease: false
|
58
|
-
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.1.0
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rspec
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
59
51
|
none: false
|
60
|
-
requirements:
|
52
|
+
requirements:
|
61
53
|
- - ~>
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
hash: 27
|
64
|
-
segments:
|
65
|
-
- 2
|
66
|
-
- 5
|
67
|
-
- 0
|
54
|
+
- !ruby/object:Gem::Version
|
68
55
|
version: 2.5.0
|
69
56
|
type: :development
|
70
|
-
version_requirements: *id003
|
71
|
-
- !ruby/object:Gem::Dependency
|
72
|
-
name: vcr
|
73
57
|
prerelease: false
|
74
|
-
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
59
|
none: false
|
76
|
-
requirements:
|
60
|
+
requirements:
|
77
61
|
- - ~>
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 2.5.0
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
name: vcr
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ~>
|
70
|
+
- !ruby/object:Gem::Version
|
84
71
|
version: 1.11.1
|
85
72
|
type: :development
|
86
|
-
version_requirements: *id004
|
87
|
-
- !ruby/object:Gem::Dependency
|
88
|
-
name: webmock
|
89
73
|
prerelease: false
|
90
|
-
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
91
75
|
none: false
|
92
|
-
requirements:
|
76
|
+
requirements:
|
93
77
|
- - ~>
|
94
|
-
- !ruby/object:Gem::Version
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.11.1
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: webmock
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
100
87
|
version: 1.7.0
|
101
88
|
type: :development
|
102
|
-
version_requirements: *id005
|
103
|
-
- !ruby/object:Gem::Dependency
|
104
|
-
name: awesome_print
|
105
89
|
prerelease: false
|
106
|
-
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
91
|
none: false
|
108
|
-
requirements:
|
92
|
+
requirements:
|
109
93
|
- - ~>
|
110
|
-
- !ruby/object:Gem::Version
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: 1.7.0
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: awesome_print
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
116
103
|
version: 0.4.0
|
117
104
|
type: :development
|
118
|
-
|
119
|
-
|
120
|
-
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ~>
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: 0.4.0
|
112
|
+
description: Gathers disasters from Reliefweb via RSS and presents them as a Disaster
|
113
|
+
hashie
|
114
|
+
email:
|
121
115
|
- itdept@crossroads.org.hk
|
122
116
|
executables: []
|
123
|
-
|
124
117
|
extensions: []
|
125
|
-
|
126
118
|
extra_rdoc_files: []
|
127
|
-
|
128
|
-
files:
|
119
|
+
files:
|
129
120
|
- .gitignore
|
130
121
|
- .rspec
|
131
122
|
- Gemfile
|
132
123
|
- README.md
|
133
124
|
- Rakefile
|
134
|
-
- lib/glidenumber.rb
|
135
125
|
- lib/reliefweb.rb
|
136
126
|
- reliefweb_scraper.gemspec
|
137
|
-
- spec/fixtures/vcr_cassettes/glidenumber.yml
|
138
127
|
- spec/fixtures/vcr_cassettes/reliefweb.yml
|
139
|
-
- spec/lib/glidenumber_spec.rb
|
140
128
|
- spec/lib/reliefweb_spec.rb
|
141
129
|
- spec/spec_helper.rb
|
142
|
-
has_rdoc: true
|
143
130
|
homepage: http://www.crossroads.org.hk
|
144
131
|
licenses: []
|
145
|
-
|
146
132
|
post_install_message:
|
147
133
|
rdoc_options: []
|
148
|
-
|
149
|
-
require_paths:
|
134
|
+
require_paths:
|
150
135
|
- lib
|
151
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
136
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
152
137
|
none: false
|
153
|
-
requirements:
|
154
|
-
- -
|
155
|
-
- !ruby/object:Gem::Version
|
156
|
-
|
157
|
-
segments:
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
segments:
|
158
143
|
- 0
|
159
|
-
|
160
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
144
|
+
hash: -475121041
|
145
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
146
|
none: false
|
162
|
-
requirements:
|
163
|
-
- -
|
164
|
-
- !ruby/object:Gem::Version
|
165
|
-
|
166
|
-
segments:
|
147
|
+
requirements:
|
148
|
+
- - ! '>='
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
segments:
|
167
152
|
- 0
|
168
|
-
|
153
|
+
hash: -475121041
|
169
154
|
requirements: []
|
170
|
-
|
171
155
|
rubyforge_project:
|
172
|
-
rubygems_version: 1.
|
156
|
+
rubygems_version: 1.8.20
|
173
157
|
signing_key:
|
174
158
|
specification_version: 3
|
175
|
-
summary:
|
159
|
+
summary: Feeds disasters from Reliefweb
|
176
160
|
test_files: []
|
177
|
-
|
data/lib/glidenumber.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
require 'hashie'
|
3
|
-
require 'mechanize'
|
4
|
-
|
5
|
-
module Glidenumber
|
6
|
-
class SearchError < StandardError; end
|
7
|
-
class ParseError < StandardError; end
|
8
|
-
|
9
|
-
class Record < Hashie::Mash; end
|
10
|
-
|
11
|
-
def self.find(glidenumber)
|
12
|
-
unless query = glidenumber.upcase[/([0-9]{4}-[0-9]{6}-[A-Z]{3})/,1]
|
13
|
-
raise ArgumentError.new("Glidenumber is invalid. Should contain: 1234-123456-ABC")
|
14
|
-
end
|
15
|
-
agent = Mechanize.new
|
16
|
-
agent.post("http://glidenumber.net/glide/public/search/search.jsp",
|
17
|
-
{'X_Resolution' => "1280",
|
18
|
-
'events' => "*",
|
19
|
-
'ftoption' => "&",
|
20
|
-
'go.x' => "0",
|
21
|
-
'go.y' => "0",
|
22
|
-
'keywords' => query,
|
23
|
-
'level0' => "*",
|
24
|
-
'level1' => "*",
|
25
|
-
'maxhits' => "10",
|
26
|
-
'nStart' => "0",
|
27
|
-
'posted' => "0",
|
28
|
-
'process' => "0",
|
29
|
-
'sortby' => "0"}
|
30
|
-
)
|
31
|
-
links = agent.page.parser.css("table tr td.bgLightLight table tr td table tr.bgLightLight[2] a")
|
32
|
-
if links.size != 1
|
33
|
-
if links.empty?
|
34
|
-
raise SearchError.new("Did not find any records matching: #{query}")
|
35
|
-
else
|
36
|
-
raise SearchError.new("Found too many records matching: #{query}")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
link = links.first
|
40
|
-
glidenumber = link.text.strip
|
41
|
-
glidenumber.match /([A-Z]{2})-\d{4}-\d{6}-([A-Z]{3})/
|
42
|
-
disaster_type, country_code = $1, $2
|
43
|
-
|
44
|
-
agent.get(link.attributes["href"].to_s)
|
45
|
-
|
46
|
-
country = agent.page.parser.css("table tr td table.basefontSmall tr[3] td.bgLightLight strong").text.strip.split.last
|
47
|
-
raise ParseError.new("Could not find country on Glidenumber page!") if country == ""
|
48
|
-
date = agent.page.parser.css("table tr td table.basefontSmall tr[5] td.bgLightLight[2]").text.strip
|
49
|
-
date = Date.strptime(date, '%Y-%m-%d')
|
50
|
-
|
51
|
-
Record.new({:country => country,
|
52
|
-
:country_code => country_code,
|
53
|
-
:date => date,
|
54
|
-
:glidenumber => glidenumber,
|
55
|
-
:disaster_type => disaster_type})
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|