reliefweb_scraper 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,26 +5,18 @@ describe Reliefweb do
5
5
  use_vcr_cassette 'reliefweb', :record => :new_episodes
6
6
 
7
7
  it 'should fetch a list of featured disasters' do
8
- Reliefweb.should_receive(:fetch_glidenumber).at_least(:once).with(/^\//).and_return("OT-2011-000110-UGA")
9
- Glidenumber.should_receive(:find).at_least(:once).with("OT-2011-000110-UGA").and_return(
10
- Glidenumber::Record.new(:country => "Uganda",
11
- :date => Date.new(2011, 8, 12),
12
- :glidenumber => "MS-2011-000110-UGA"))
13
-
14
8
  disasters = Reliefweb.featured_disasters
15
9
  disasters.should_not be_empty
16
10
  disaster = disasters.first
17
11
  disaster.should be_a(Reliefweb::Disaster)
18
- disaster.url.should == "http://reliefweb.int/horn-africa-crisis2011"
19
- disaster.country.should == "Uganda"
20
- disaster.date.should == Date.new(2011, 8, 12)
21
- disaster.glidenumber.should == 'MS-2011-000110-UGA'
22
- disaster.title.should == 'Horn of Africa Crisis'
12
+ disaster.url.should == "http://reliefweb.int/disaster/tc-2012-000036-mdg"
13
+ disaster.country_code.should == "mdg, moz"
14
+ disaster.date.should == Time.parse('Mon, 05 Mar 2012 00:00:00 +0000')
15
+ disaster.glidenumber.should == 'TC-2012-000036-MDG'
16
+ disaster.title.should == 'Tropical Storm Irina'
17
+ disaster.current.should be_true
18
+ disaster.disaster_type.should == "Tropical Cyclone, Flood, Violent Wind"
23
19
  end
24
20
 
25
- it 'should parse a glide number from a disaster page' do
26
- glidenumber = Reliefweb.fetch_glidenumber("http://reliefweb.int/taxonomy/term/8956")
27
- glidenumber.should == "MS-2011-000110-UGA"
28
- end
29
21
  end
30
22
 
metadata CHANGED
@@ -1,177 +1,160 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: reliefweb_scraper
3
- version: !ruby/object:Gem::Version
4
- hash: 17
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 3
9
- - 1
10
- version: 0.3.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Nathan Broadbent
14
9
  - Stanley Lau
15
10
  - Stephen Kenworthy
16
11
  autorequire:
17
12
  bindir: bin
18
13
  cert_chain: []
19
-
20
- date: 2011-10-12 00:00:00 +08:00
21
- default_executable:
22
- dependencies:
23
- - !ruby/object:Gem::Dependency
24
- name: mechanize
25
- prerelease: false
26
- requirement: &id001 !ruby/object:Gem::Requirement
14
+ date: 2012-03-24 00:00:00.000000000Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: nokogiri
18
+ requirement: !ruby/object:Gem::Requirement
27
19
  none: false
28
- requirements:
29
- - - ">="
30
- - !ruby/object:Gem::Version
31
- hash: 13
32
- segments:
33
- - 2
34
- - 0
35
- - 1
36
- version: 2.0.1
20
+ requirements:
21
+ - - ! '>='
22
+ - !ruby/object:Gem::Version
23
+ version: 1.5.0
37
24
  type: :runtime
38
- version_requirements: *id001
39
- - !ruby/object:Gem::Dependency
40
- name: hashie
41
25
  prerelease: false
42
- requirement: &id002 !ruby/object:Gem::Requirement
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ! '>='
30
+ - !ruby/object:Gem::Version
31
+ version: 1.5.0
32
+ - !ruby/object:Gem::Dependency
33
+ name: hashie
34
+ requirement: !ruby/object:Gem::Requirement
43
35
  none: false
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- hash: 19
48
- segments:
49
- - 1
50
- - 1
51
- - 0
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
52
39
  version: 1.1.0
53
40
  type: :runtime
54
- version_requirements: *id002
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
41
  prerelease: false
58
- requirement: &id003 !ruby/object:Gem::Requirement
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.1.0
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ requirement: !ruby/object:Gem::Requirement
59
51
  none: false
60
- requirements:
52
+ requirements:
61
53
  - - ~>
62
- - !ruby/object:Gem::Version
63
- hash: 27
64
- segments:
65
- - 2
66
- - 5
67
- - 0
54
+ - !ruby/object:Gem::Version
68
55
  version: 2.5.0
69
56
  type: :development
70
- version_requirements: *id003
71
- - !ruby/object:Gem::Dependency
72
- name: vcr
73
57
  prerelease: false
74
- requirement: &id004 !ruby/object:Gem::Requirement
58
+ version_requirements: !ruby/object:Gem::Requirement
75
59
  none: false
76
- requirements:
60
+ requirements:
77
61
  - - ~>
78
- - !ruby/object:Gem::Version
79
- hash: 57
80
- segments:
81
- - 1
82
- - 11
83
- - 1
62
+ - !ruby/object:Gem::Version
63
+ version: 2.5.0
64
+ - !ruby/object:Gem::Dependency
65
+ name: vcr
66
+ requirement: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
84
71
  version: 1.11.1
85
72
  type: :development
86
- version_requirements: *id004
87
- - !ruby/object:Gem::Dependency
88
- name: webmock
89
73
  prerelease: false
90
- requirement: &id005 !ruby/object:Gem::Requirement
74
+ version_requirements: !ruby/object:Gem::Requirement
91
75
  none: false
92
- requirements:
76
+ requirements:
93
77
  - - ~>
94
- - !ruby/object:Gem::Version
95
- hash: 11
96
- segments:
97
- - 1
98
- - 7
99
- - 0
78
+ - !ruby/object:Gem::Version
79
+ version: 1.11.1
80
+ - !ruby/object:Gem::Dependency
81
+ name: webmock
82
+ requirement: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
100
87
  version: 1.7.0
101
88
  type: :development
102
- version_requirements: *id005
103
- - !ruby/object:Gem::Dependency
104
- name: awesome_print
105
89
  prerelease: false
106
- requirement: &id006 !ruby/object:Gem::Requirement
90
+ version_requirements: !ruby/object:Gem::Requirement
107
91
  none: false
108
- requirements:
92
+ requirements:
109
93
  - - ~>
110
- - !ruby/object:Gem::Version
111
- hash: 15
112
- segments:
113
- - 0
114
- - 4
115
- - 0
94
+ - !ruby/object:Gem::Version
95
+ version: 1.7.0
96
+ - !ruby/object:Gem::Dependency
97
+ name: awesome_print
98
+ requirement: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
116
103
  version: 0.4.0
117
104
  type: :development
118
- version_requirements: *id006
119
- description: Scrapes reliefweb's featured disasters, and pulls additional information from glidenumber.net
120
- email:
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ~>
110
+ - !ruby/object:Gem::Version
111
+ version: 0.4.0
112
+ description: Gathers disasters from Reliefweb via RSS and presents them as a Disaster
113
+ hashie
114
+ email:
121
115
  - itdept@crossroads.org.hk
122
116
  executables: []
123
-
124
117
  extensions: []
125
-
126
118
  extra_rdoc_files: []
127
-
128
- files:
119
+ files:
129
120
  - .gitignore
130
121
  - .rspec
131
122
  - Gemfile
132
123
  - README.md
133
124
  - Rakefile
134
- - lib/glidenumber.rb
135
125
  - lib/reliefweb.rb
136
126
  - reliefweb_scraper.gemspec
137
- - spec/fixtures/vcr_cassettes/glidenumber.yml
138
127
  - spec/fixtures/vcr_cassettes/reliefweb.yml
139
- - spec/lib/glidenumber_spec.rb
140
128
  - spec/lib/reliefweb_spec.rb
141
129
  - spec/spec_helper.rb
142
- has_rdoc: true
143
130
  homepage: http://www.crossroads.org.hk
144
131
  licenses: []
145
-
146
132
  post_install_message:
147
133
  rdoc_options: []
148
-
149
- require_paths:
134
+ require_paths:
150
135
  - lib
151
- required_ruby_version: !ruby/object:Gem::Requirement
136
+ required_ruby_version: !ruby/object:Gem::Requirement
152
137
  none: false
153
- requirements:
154
- - - ">="
155
- - !ruby/object:Gem::Version
156
- hash: 3
157
- segments:
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ segments:
158
143
  - 0
159
- version: "0"
160
- required_rubygems_version: !ruby/object:Gem::Requirement
144
+ hash: -475121041
145
+ required_rubygems_version: !ruby/object:Gem::Requirement
161
146
  none: false
162
- requirements:
163
- - - ">="
164
- - !ruby/object:Gem::Version
165
- hash: 3
166
- segments:
147
+ requirements:
148
+ - - ! '>='
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ segments:
167
152
  - 0
168
- version: "0"
153
+ hash: -475121041
169
154
  requirements: []
170
-
171
155
  rubyforge_project:
172
- rubygems_version: 1.6.2
156
+ rubygems_version: 1.8.20
173
157
  signing_key:
174
158
  specification_version: 3
175
- summary: Scrape Reliefweb featured disasters
159
+ summary: Feeds disasters from Reliefweb
176
160
  test_files: []
177
-
data/lib/glidenumber.rb DELETED
@@ -1,58 +0,0 @@
1
- require 'date'
2
- require 'hashie'
3
- require 'mechanize'
4
-
5
- module Glidenumber
6
- class SearchError < StandardError; end
7
- class ParseError < StandardError; end
8
-
9
- class Record < Hashie::Mash; end
10
-
11
- def self.find(glidenumber)
12
- unless query = glidenumber.upcase[/([0-9]{4}-[0-9]{6}-[A-Z]{3})/,1]
13
- raise ArgumentError.new("Glidenumber is invalid. Should contain: 1234-123456-ABC")
14
- end
15
- agent = Mechanize.new
16
- agent.post("http://glidenumber.net/glide/public/search/search.jsp",
17
- {'X_Resolution' => "1280",
18
- 'events' => "*",
19
- 'ftoption' => "&",
20
- 'go.x' => "0",
21
- 'go.y' => "0",
22
- 'keywords' => query,
23
- 'level0' => "*",
24
- 'level1' => "*",
25
- 'maxhits' => "10",
26
- 'nStart' => "0",
27
- 'posted' => "0",
28
- 'process' => "0",
29
- 'sortby' => "0"}
30
- )
31
- links = agent.page.parser.css("table tr td.bgLightLight table tr td table tr.bgLightLight[2] a")
32
- if links.size != 1
33
- if links.empty?
34
- raise SearchError.new("Did not find any records matching: #{query}")
35
- else
36
- raise SearchError.new("Found too many records matching: #{query}")
37
- end
38
- end
39
- link = links.first
40
- glidenumber = link.text.strip
41
- glidenumber.match /([A-Z]{2})-\d{4}-\d{6}-([A-Z]{3})/
42
- disaster_type, country_code = $1, $2
43
-
44
- agent.get(link.attributes["href"].to_s)
45
-
46
- country = agent.page.parser.css("table tr td table.basefontSmall tr[3] td.bgLightLight strong").text.strip.split.last
47
- raise ParseError.new("Could not find country on Glidenumber page!") if country == ""
48
- date = agent.page.parser.css("table tr td table.basefontSmall tr[5] td.bgLightLight[2]").text.strip
49
- date = Date.strptime(date, '%Y-%m-%d')
50
-
51
- Record.new({:country => country,
52
- :country_code => country_code,
53
- :date => date,
54
- :glidenumber => glidenumber,
55
- :disaster_type => disaster_type})
56
- end
57
- end
58
-