reliefweb_scraper 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,26 +5,18 @@ describe Reliefweb do
5
5
  use_vcr_cassette 'reliefweb', :record => :new_episodes
6
6
 
7
7
  it 'should fetch a list of featured disasters' do
8
- Reliefweb.should_receive(:fetch_glidenumber).at_least(:once).with(/^\//).and_return("OT-2011-000110-UGA")
9
- Glidenumber.should_receive(:find).at_least(:once).with("OT-2011-000110-UGA").and_return(
10
- Glidenumber::Record.new(:country => "Uganda",
11
- :date => Date.new(2011, 8, 12),
12
- :glidenumber => "MS-2011-000110-UGA"))
13
-
14
8
  disasters = Reliefweb.featured_disasters
15
9
  disasters.should_not be_empty
16
10
  disaster = disasters.first
17
11
  disaster.should be_a(Reliefweb::Disaster)
18
- disaster.url.should == "http://reliefweb.int/horn-africa-crisis2011"
19
- disaster.country.should == "Uganda"
20
- disaster.date.should == Date.new(2011, 8, 12)
21
- disaster.glidenumber.should == 'MS-2011-000110-UGA'
22
- disaster.title.should == 'Horn of Africa Crisis'
12
+ disaster.url.should == "http://reliefweb.int/disaster/tc-2012-000036-mdg"
13
+ disaster.country_code.should == "mdg, moz"
14
+ disaster.date.should == Time.parse('Mon, 05 Mar 2012 00:00:00 +0000')
15
+ disaster.glidenumber.should == 'TC-2012-000036-MDG'
16
+ disaster.title.should == 'Tropical Storm Irina'
17
+ disaster.current.should be_true
18
+ disaster.disaster_type.should == "Tropical Cyclone, Flood, Violent Wind"
23
19
  end
24
20
 
25
- it 'should parse a glide number from a disaster page' do
26
- glidenumber = Reliefweb.fetch_glidenumber("http://reliefweb.int/taxonomy/term/8956")
27
- glidenumber.should == "MS-2011-000110-UGA"
28
- end
29
21
  end
30
22
 
metadata CHANGED
@@ -1,177 +1,160 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: reliefweb_scraper
3
- version: !ruby/object:Gem::Version
4
- hash: 17
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 3
9
- - 1
10
- version: 0.3.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Nathan Broadbent
14
9
  - Stanley Lau
15
10
  - Stephen Kenworthy
16
11
  autorequire:
17
12
  bindir: bin
18
13
  cert_chain: []
19
-
20
- date: 2011-10-12 00:00:00 +08:00
21
- default_executable:
22
- dependencies:
23
- - !ruby/object:Gem::Dependency
24
- name: mechanize
25
- prerelease: false
26
- requirement: &id001 !ruby/object:Gem::Requirement
14
+ date: 2012-03-24 00:00:00.000000000Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: nokogiri
18
+ requirement: !ruby/object:Gem::Requirement
27
19
  none: false
28
- requirements:
29
- - - ">="
30
- - !ruby/object:Gem::Version
31
- hash: 13
32
- segments:
33
- - 2
34
- - 0
35
- - 1
36
- version: 2.0.1
20
+ requirements:
21
+ - - ! '>='
22
+ - !ruby/object:Gem::Version
23
+ version: 1.5.0
37
24
  type: :runtime
38
- version_requirements: *id001
39
- - !ruby/object:Gem::Dependency
40
- name: hashie
41
25
  prerelease: false
42
- requirement: &id002 !ruby/object:Gem::Requirement
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ! '>='
30
+ - !ruby/object:Gem::Version
31
+ version: 1.5.0
32
+ - !ruby/object:Gem::Dependency
33
+ name: hashie
34
+ requirement: !ruby/object:Gem::Requirement
43
35
  none: false
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- hash: 19
48
- segments:
49
- - 1
50
- - 1
51
- - 0
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
52
39
  version: 1.1.0
53
40
  type: :runtime
54
- version_requirements: *id002
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
41
  prerelease: false
58
- requirement: &id003 !ruby/object:Gem::Requirement
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.1.0
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ requirement: !ruby/object:Gem::Requirement
59
51
  none: false
60
- requirements:
52
+ requirements:
61
53
  - - ~>
62
- - !ruby/object:Gem::Version
63
- hash: 27
64
- segments:
65
- - 2
66
- - 5
67
- - 0
54
+ - !ruby/object:Gem::Version
68
55
  version: 2.5.0
69
56
  type: :development
70
- version_requirements: *id003
71
- - !ruby/object:Gem::Dependency
72
- name: vcr
73
57
  prerelease: false
74
- requirement: &id004 !ruby/object:Gem::Requirement
58
+ version_requirements: !ruby/object:Gem::Requirement
75
59
  none: false
76
- requirements:
60
+ requirements:
77
61
  - - ~>
78
- - !ruby/object:Gem::Version
79
- hash: 57
80
- segments:
81
- - 1
82
- - 11
83
- - 1
62
+ - !ruby/object:Gem::Version
63
+ version: 2.5.0
64
+ - !ruby/object:Gem::Dependency
65
+ name: vcr
66
+ requirement: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
84
71
  version: 1.11.1
85
72
  type: :development
86
- version_requirements: *id004
87
- - !ruby/object:Gem::Dependency
88
- name: webmock
89
73
  prerelease: false
90
- requirement: &id005 !ruby/object:Gem::Requirement
74
+ version_requirements: !ruby/object:Gem::Requirement
91
75
  none: false
92
- requirements:
76
+ requirements:
93
77
  - - ~>
94
- - !ruby/object:Gem::Version
95
- hash: 11
96
- segments:
97
- - 1
98
- - 7
99
- - 0
78
+ - !ruby/object:Gem::Version
79
+ version: 1.11.1
80
+ - !ruby/object:Gem::Dependency
81
+ name: webmock
82
+ requirement: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
100
87
  version: 1.7.0
101
88
  type: :development
102
- version_requirements: *id005
103
- - !ruby/object:Gem::Dependency
104
- name: awesome_print
105
89
  prerelease: false
106
- requirement: &id006 !ruby/object:Gem::Requirement
90
+ version_requirements: !ruby/object:Gem::Requirement
107
91
  none: false
108
- requirements:
92
+ requirements:
109
93
  - - ~>
110
- - !ruby/object:Gem::Version
111
- hash: 15
112
- segments:
113
- - 0
114
- - 4
115
- - 0
94
+ - !ruby/object:Gem::Version
95
+ version: 1.7.0
96
+ - !ruby/object:Gem::Dependency
97
+ name: awesome_print
98
+ requirement: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
116
103
  version: 0.4.0
117
104
  type: :development
118
- version_requirements: *id006
119
- description: Scrapes reliefweb's featured disasters, and pulls additional information from glidenumber.net
120
- email:
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ~>
110
+ - !ruby/object:Gem::Version
111
+ version: 0.4.0
112
+ description: Gathers disasters from Reliefweb via RSS and presents them as a Disaster
113
+ hashie
114
+ email:
121
115
  - itdept@crossroads.org.hk
122
116
  executables: []
123
-
124
117
  extensions: []
125
-
126
118
  extra_rdoc_files: []
127
-
128
- files:
119
+ files:
129
120
  - .gitignore
130
121
  - .rspec
131
122
  - Gemfile
132
123
  - README.md
133
124
  - Rakefile
134
- - lib/glidenumber.rb
135
125
  - lib/reliefweb.rb
136
126
  - reliefweb_scraper.gemspec
137
- - spec/fixtures/vcr_cassettes/glidenumber.yml
138
127
  - spec/fixtures/vcr_cassettes/reliefweb.yml
139
- - spec/lib/glidenumber_spec.rb
140
128
  - spec/lib/reliefweb_spec.rb
141
129
  - spec/spec_helper.rb
142
- has_rdoc: true
143
130
  homepage: http://www.crossroads.org.hk
144
131
  licenses: []
145
-
146
132
  post_install_message:
147
133
  rdoc_options: []
148
-
149
- require_paths:
134
+ require_paths:
150
135
  - lib
151
- required_ruby_version: !ruby/object:Gem::Requirement
136
+ required_ruby_version: !ruby/object:Gem::Requirement
152
137
  none: false
153
- requirements:
154
- - - ">="
155
- - !ruby/object:Gem::Version
156
- hash: 3
157
- segments:
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ segments:
158
143
  - 0
159
- version: "0"
160
- required_rubygems_version: !ruby/object:Gem::Requirement
144
+ hash: -475121041
145
+ required_rubygems_version: !ruby/object:Gem::Requirement
161
146
  none: false
162
- requirements:
163
- - - ">="
164
- - !ruby/object:Gem::Version
165
- hash: 3
166
- segments:
147
+ requirements:
148
+ - - ! '>='
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ segments:
167
152
  - 0
168
- version: "0"
153
+ hash: -475121041
169
154
  requirements: []
170
-
171
155
  rubyforge_project:
172
- rubygems_version: 1.6.2
156
+ rubygems_version: 1.8.20
173
157
  signing_key:
174
158
  specification_version: 3
175
- summary: Scrape Reliefweb featured disasters
159
+ summary: Feeds disasters from Reliefweb
176
160
  test_files: []
177
-
data/lib/glidenumber.rb DELETED
@@ -1,58 +0,0 @@
1
- require 'date'
2
- require 'hashie'
3
- require 'mechanize'
4
-
5
- module Glidenumber
6
- class SearchError < StandardError; end
7
- class ParseError < StandardError; end
8
-
9
- class Record < Hashie::Mash; end
10
-
11
- def self.find(glidenumber)
12
- unless query = glidenumber.upcase[/([0-9]{4}-[0-9]{6}-[A-Z]{3})/,1]
13
- raise ArgumentError.new("Glidenumber is invalid. Should contain: 1234-123456-ABC")
14
- end
15
- agent = Mechanize.new
16
- agent.post("http://glidenumber.net/glide/public/search/search.jsp",
17
- {'X_Resolution' => "1280",
18
- 'events' => "*",
19
- 'ftoption' => "&",
20
- 'go.x' => "0",
21
- 'go.y' => "0",
22
- 'keywords' => query,
23
- 'level0' => "*",
24
- 'level1' => "*",
25
- 'maxhits' => "10",
26
- 'nStart' => "0",
27
- 'posted' => "0",
28
- 'process' => "0",
29
- 'sortby' => "0"}
30
- )
31
- links = agent.page.parser.css("table tr td.bgLightLight table tr td table tr.bgLightLight[2] a")
32
- if links.size != 1
33
- if links.empty?
34
- raise SearchError.new("Did not find any records matching: #{query}")
35
- else
36
- raise SearchError.new("Found too many records matching: #{query}")
37
- end
38
- end
39
- link = links.first
40
- glidenumber = link.text.strip
41
- glidenumber.match /([A-Z]{2})-\d{4}-\d{6}-([A-Z]{3})/
42
- disaster_type, country_code = $1, $2
43
-
44
- agent.get(link.attributes["href"].to_s)
45
-
46
- country = agent.page.parser.css("table tr td table.basefontSmall tr[3] td.bgLightLight strong").text.strip.split.last
47
- raise ParseError.new("Could not find country on Glidenumber page!") if country == ""
48
- date = agent.page.parser.css("table tr td table.basefontSmall tr[5] td.bgLightLight[2]").text.strip
49
- date = Date.strptime(date, '%Y-%m-%d')
50
-
51
- Record.new({:country => country,
52
- :country_code => country_code,
53
- :date => date,
54
- :glidenumber => glidenumber,
55
- :disaster_type => disaster_type})
56
- end
57
- end
58
-