semantic-crawler 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +3 -3
- data/lib/semantic_crawler.rb +1 -0
- data/lib/semantic_crawler/factbook/country.rb +1 -1
- data/lib/semantic_crawler/gdacs/feed.rb +3 -3
- data/lib/semantic_crawler/gdacs/feed_item.rb +26 -1
- data/lib/semantic_crawler/gdacs/kml_file.rb +55 -0
- data/lib/semantic_crawler/version.rb +1 -1
- data/semantic_crawler.gemspec +1 -1
- data/spec/gdacs_spec.rb +13 -0
- data/spec/linked_geo_data_spec.rb +3 -3
- metadata +37 -35
- data/exploitation/freebase.rb +0 -13
data/README.rdoc
CHANGED
@@ -134,8 +134,8 @@ Freebase.com country information:
|
|
134
134
|
|
135
135
|
== Tested with
|
136
136
|
|
137
|
-
* Ruby 1.8.7-p358 and Rails 3.2
|
138
|
-
* Ruby 1.9.3-p125 and Rails 3.2
|
137
|
+
* Ruby 1.8.7-p358 and Rails 3.2
|
138
|
+
* Ruby 1.9.3-p125 and Rails 3.2
|
139
139
|
|
140
140
|
|
141
141
|
== Additional Links
|
@@ -156,6 +156,6 @@ published under MIT license.
|
|
156
156
|
== Warranty
|
157
157
|
|
158
158
|
This software is provided "as is" and without any express or implied
|
159
|
-
warranties, including, without limitation, the implied warranties of
|
159
|
+
warranties, including, without limitation, the implied warranties of
|
160
160
|
merchantibility and fitness for a particular purpose.
|
161
161
|
|
data/lib/semantic_crawler.rb
CHANGED
@@ -33,6 +33,7 @@ require "semantic_crawler/gdacs/feed_item.rb"
|
|
33
33
|
require "semantic_crawler/gdacs/resource.rb"
|
34
34
|
require "semantic_crawler/gdacs/emergency_feed.rb"
|
35
35
|
require "semantic_crawler/gdacs/emergency_feed_item.rb"
|
36
|
+
require "semantic_crawler/gdacs/kml_file.rb"
|
36
37
|
|
37
38
|
# DBPedia - module: Dbpedia
|
38
39
|
require "semantic_crawler/dbpedia"
|
@@ -17,13 +17,13 @@ module SemanticCrawler
|
|
17
17
|
}
|
18
18
|
|
19
19
|
# The gdacs.org RSS feed URL. (default:
|
20
|
-
# http://
|
20
|
+
# http://www.gdacs.org/xml/rss.xml)
|
21
21
|
attr_reader :url
|
22
22
|
|
23
23
|
# Initializes the gdacs.org feed URL. If not specified the default
|
24
|
-
# URL (http://
|
24
|
+
# URL (http://www.gdacs.org/xml/rss.xml) is used. Normally the
|
25
25
|
# feed URL should not be changed.
|
26
|
-
def initialize(new_url = "http://
|
26
|
+
def initialize(new_url = "http://www.gdacs.org/xml/rss.xml")
|
27
27
|
@url = new_url
|
28
28
|
@root_node = nil
|
29
29
|
begin
|
@@ -5,7 +5,7 @@ module SemanticCrawler
|
|
5
5
|
# One crisis entity with related resources. Could be one of the
|
6
6
|
# following crisis types:
|
7
7
|
#
|
8
|
-
# * Floods
|
8
|
+
# * Floods
|
9
9
|
# * Earthquakes
|
10
10
|
# * Tropical Cyclones
|
11
11
|
# * Volcanoes
|
@@ -124,16 +124,41 @@ module SemanticCrawler
|
|
124
124
|
query_root_node("gdacs:severity/text()", @@NAMESPACES)
|
125
125
|
end
|
126
126
|
|
127
|
+
# Returns the severity value (without further text)
|
128
|
+
def severity_value
|
129
|
+
query_root_node("gdacs:severity/@value", @@NAMESPACES)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Returns the severity unit (without further text)
|
133
|
+
def severity_unit
|
134
|
+
query_root_node("gdacs:severity/@unit", @@NAMESPACES)
|
135
|
+
end
|
136
|
+
|
127
137
|
# Returns the population as human readable string
|
128
138
|
def population
|
129
139
|
query_root_node("gdacs:population/text()", @@NAMESPACES)
|
130
140
|
end
|
131
141
|
|
142
|
+
# Returns the population value (without further text)
|
143
|
+
def population_value
|
144
|
+
query_root_node("gdacs:population/@value", @@NAMESPACES)
|
145
|
+
end
|
146
|
+
|
147
|
+
# Returns the population unit (without further text)
|
148
|
+
def population_unit
|
149
|
+
query_root_node("gdacs:population/@unit", @@NAMESPACES)
|
150
|
+
end
|
151
|
+
|
132
152
|
# Returns the vulnerability as human readable string
|
133
153
|
def vulnerability
|
134
154
|
query_root_node("gdacs:vulnerability/text()", @@NAMESPACES)
|
135
155
|
end
|
136
156
|
|
157
|
+
# Returns the vulnerability as number
|
158
|
+
def vulnerability_value
|
159
|
+
query_root_node("gdacs:vulnerability/@value", @@NAMESPACES)
|
160
|
+
end
|
161
|
+
|
137
162
|
# Returns the country iso3 code if available
|
138
163
|
def iso3
|
139
164
|
query_root_node("gdacs:iso3/text()", @@NAMESPACES)
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module SemanticCrawler
|
4
|
+
module Gdacs
|
5
|
+
# The GDACS.org KML file contains geo information about the latest
|
6
|
+
# crises.
|
7
|
+
class KmlFile
|
8
|
+
|
9
|
+
# XML namespaces used for the parsing process
|
10
|
+
@@NAMESPACES = {
|
11
|
+
"k" => "http://www.opengis.net/kml/2.2"
|
12
|
+
}
|
13
|
+
|
14
|
+
# The gdacs.org KML file URL. (default:
|
15
|
+
# http://www.gdacs.org/xml/gdacs.kml)
|
16
|
+
attr_reader :url
|
17
|
+
|
18
|
+
# Initializes the gdacs.org KML file URL. If not specified the default
|
19
|
+
# URL (http://www..gdacs.org/xml/gdacs.kml) is used. Normally the
|
20
|
+
# file URL should not be changed.
|
21
|
+
def initialize(new_url = "http://www.gdacs.org/xml/gdacs.kml")
|
22
|
+
@url = new_url
|
23
|
+
@root_node = nil
|
24
|
+
begin
|
25
|
+
fetch_file
|
26
|
+
rescue => e
|
27
|
+
$log.error("Not able to get country information, through exception: #{e}")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def name
|
32
|
+
query_root_node("k:name/text()", @@NAMESPACES)
|
33
|
+
end
|
34
|
+
|
35
|
+
def placemark(crisis_id)
|
36
|
+
query_root_node("//k:Placemark[@id='#{crisis_id}']", @@NAMESPACES)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Query the root node
|
40
|
+
def query_root_node(xpath_query, namespace = {})
|
41
|
+
if !@root_node.nil?
|
42
|
+
@root_node.xpath(xpath_query, namespace)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
# Retrieves the RSS feed
|
48
|
+
def fetch_file
|
49
|
+
@doc = Nokogiri::XML(open(@url))
|
50
|
+
@root_node = @doc.xpath("/k:kml/k:Document", @@NAMESPACES)
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/semantic_crawler.gemspec
CHANGED
@@ -27,7 +27,7 @@ Gem::Specification.new do |s|
|
|
27
27
|
|
28
28
|
s.add_development_dependency "yard"
|
29
29
|
s.add_development_dependency "grit"
|
30
|
-
s.add_development_dependency "rails", "~> 3.2
|
30
|
+
s.add_development_dependency "rails", "~> 3.2"
|
31
31
|
s.add_development_dependency "sqlite3"
|
32
32
|
s.add_development_dependency "rspec-rails"
|
33
33
|
s.add_development_dependency "simplecov"
|
data/spec/gdacs_spec.rb
CHANGED
@@ -67,8 +67,13 @@ describe SemanticCrawler::Gdacs do
|
|
67
67
|
be_valid !item.eventid.nil?
|
68
68
|
be_valid !item.episodeid.nil?
|
69
69
|
be_valid !item.severity.nil?
|
70
|
+
be_valid !item.severity_value.nil?
|
71
|
+
be_valid !item.severity_unit.nil?
|
70
72
|
be_valid !item.population.nil?
|
73
|
+
be_valid !item.population_value.nil?
|
74
|
+
be_valid !item.population_value.nil?
|
71
75
|
be_valid !item.vulnerability.nil?
|
76
|
+
be_valid !item.vulnerability_value.nil?
|
72
77
|
be_valid !item.country.nil?
|
73
78
|
be_valid !item.iso3.nil?
|
74
79
|
be_valid !item.glide.nil?
|
@@ -89,6 +94,14 @@ describe SemanticCrawler::Gdacs do
|
|
89
94
|
end
|
90
95
|
end
|
91
96
|
|
97
|
+
xit "test kml file" do
|
98
|
+
kmlFile = SemanticCrawler::Gdacs::KmlFile.new
|
99
|
+
kmlFile.name.to_s.should_not be_empty
|
100
|
+
puts kmlFile.placemark("EQ_117653")
|
101
|
+
puts "----"
|
102
|
+
puts kmlFile.placemark("EQ_117653_Intensity")
|
103
|
+
end
|
104
|
+
|
92
105
|
it "test emergency feed" do
|
93
106
|
noFeed = SemanticCrawler::Gdacs::EmergencyFeed.new(nil)
|
94
107
|
be_valid noFeed.nil?
|
@@ -7,17 +7,17 @@ describe SemanticCrawler::LinkedGeoData do
|
|
7
7
|
#@dresden = SemanticCrawler::LinkedGeoData::RelevantNodes.new(51.033333, 13.733333, 1000, "TrafficSignals")
|
8
8
|
end
|
9
9
|
|
10
|
-
|
10
|
+
xit "check lat/long/radius of dresden" do
|
11
11
|
@dresden.latitude.to_s.should eq("51.033333")
|
12
12
|
@dresden.longitude.to_s.should eq("13.733333")
|
13
13
|
@dresden.radius.to_s.should eq("1000")
|
14
14
|
end
|
15
15
|
|
16
|
-
|
16
|
+
xit "output xml dump " do
|
17
17
|
@dresden.xml_document.size.should > 0
|
18
18
|
end
|
19
19
|
|
20
|
-
|
20
|
+
xit "check single relevant node entries" do
|
21
21
|
nodes = @dresden.relevant_nodes
|
22
22
|
nodes.each do |item|
|
23
23
|
item.xml_document.size.should > 0
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: semantic-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: httparty
|
@@ -114,7 +114,7 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - ~>
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 3.2
|
117
|
+
version: '3.2'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -122,7 +122,7 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - ~>
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 3.2
|
125
|
+
version: '3.2'
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
127
|
name: sqlite3
|
128
128
|
requirement: !ruby/object:Gem::Requirement
|
@@ -206,7 +206,6 @@ files:
|
|
206
206
|
- MIT-LICENSE
|
207
207
|
- README.rdoc
|
208
208
|
- Rakefile
|
209
|
-
- exploitation/freebase.rb
|
210
209
|
- lib/semantic_crawler.rb
|
211
210
|
- lib/semantic_crawler/dbpedia.rb
|
212
211
|
- lib/semantic_crawler/factbook.rb
|
@@ -220,6 +219,7 @@ files:
|
|
220
219
|
- lib/semantic_crawler/gdacs/emergency_feed_item.rb
|
221
220
|
- lib/semantic_crawler/gdacs/feed.rb
|
222
221
|
- lib/semantic_crawler/gdacs/feed_item.rb
|
222
|
+
- lib/semantic_crawler/gdacs/kml_file.rb
|
223
223
|
- lib/semantic_crawler/gdacs/resource.rb
|
224
224
|
- lib/semantic_crawler/geo_names.rb
|
225
225
|
- lib/semantic_crawler/geo_names/country.rb
|
@@ -276,6 +276,7 @@ files:
|
|
276
276
|
- test/semantic_crawler_test.rb
|
277
277
|
- test/test_helper.rb
|
278
278
|
- test/dummy/log/test.log
|
279
|
+
- test/dummy/log/development.log
|
279
280
|
homepage: https://github.com/obale/semantic_crawler
|
280
281
|
licenses:
|
281
282
|
- MIT
|
@@ -291,7 +292,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
291
292
|
version: '0'
|
292
293
|
segments:
|
293
294
|
- 0
|
294
|
-
hash:
|
295
|
+
hash: 1493129874267030689
|
295
296
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
296
297
|
none: false
|
297
298
|
requirements:
|
@@ -300,55 +301,56 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
300
301
|
version: '0'
|
301
302
|
segments:
|
302
303
|
- 0
|
303
|
-
hash:
|
304
|
+
hash: 1493129874267030689
|
304
305
|
requirements: []
|
305
306
|
rubyforge_project:
|
306
|
-
rubygems_version: 1.8.
|
307
|
+
rubygems_version: 1.8.24
|
307
308
|
signing_key:
|
308
309
|
specification_version: 3
|
309
310
|
summary: SemanticCrawler is a ruby library that encapsulates data gathering from different
|
310
311
|
sources.
|
311
312
|
test_files:
|
313
|
+
- test/test_helper.rb
|
312
314
|
- test/semantic_crawler_test.rb
|
313
|
-
- test/dummy/
|
314
|
-
- test/dummy/
|
315
|
-
- test/dummy/
|
316
|
-
- test/dummy/
|
317
|
-
- test/dummy/
|
318
|
-
- test/dummy/
|
319
|
-
- test/dummy/config
|
320
|
-
- test/dummy/config/application.rb
|
315
|
+
- test/dummy/log/test.log
|
316
|
+
- test/dummy/log/development.log
|
317
|
+
- test/dummy/public/422.html
|
318
|
+
- test/dummy/public/favicon.ico
|
319
|
+
- test/dummy/public/500.html
|
320
|
+
- test/dummy/public/404.html
|
321
|
+
- test/dummy/config.ru
|
321
322
|
- test/dummy/config/locales/en.yml
|
322
|
-
- test/dummy/config/environments/test.rb
|
323
323
|
- test/dummy/config/environments/production.rb
|
324
|
+
- test/dummy/config/environments/test.rb
|
324
325
|
- test/dummy/config/environments/development.rb
|
325
|
-
- test/dummy/config/environment.rb
|
326
|
-
- test/dummy/config/database.yml
|
327
326
|
- test/dummy/config/boot.rb
|
328
|
-
- test/dummy/
|
327
|
+
- test/dummy/config/application.rb
|
328
|
+
- test/dummy/config/database.yml
|
329
|
+
- test/dummy/config/environment.rb
|
330
|
+
- test/dummy/config/routes.rb
|
331
|
+
- test/dummy/config/initializers/secret_token.rb
|
332
|
+
- test/dummy/config/initializers/inflections.rb
|
333
|
+
- test/dummy/config/initializers/backtrace_silencers.rb
|
334
|
+
- test/dummy/config/initializers/wrap_parameters.rb
|
335
|
+
- test/dummy/config/initializers/mime_types.rb
|
336
|
+
- test/dummy/config/initializers/session_store.rb
|
329
337
|
- test/dummy/script/rails
|
338
|
+
- test/dummy/README.rdoc
|
330
339
|
- test/dummy/db/development.sqlite3
|
331
340
|
- test/dummy/db/test.sqlite3
|
332
|
-
- test/dummy/public/500.html
|
333
|
-
- test/dummy/public/404.html
|
334
|
-
- test/dummy/public/favicon.ico
|
335
|
-
- test/dummy/public/422.html
|
336
|
-
- test/dummy/config.ru
|
337
341
|
- test/dummy/Rakefile
|
338
|
-
- test/dummy/app/
|
342
|
+
- test/dummy/app/helpers/application_helper.rb
|
339
343
|
- test/dummy/app/assets/stylesheets/application.css
|
340
|
-
- test/dummy/app/
|
344
|
+
- test/dummy/app/assets/javascripts/application.js
|
341
345
|
- test/dummy/app/controllers/application_controller.rb
|
342
|
-
- test/dummy/app/
|
343
|
-
-
|
344
|
-
- test/test_helper.rb
|
345
|
-
- spec/spec_helper.rb
|
346
|
-
- spec/fao_papua_new_guinea_spec.rb
|
347
|
-
- spec/freebase_spec.rb
|
346
|
+
- test/dummy/app/views/layouts/application.html.erb
|
347
|
+
- spec/linked_geo_data_spec.rb
|
348
348
|
- spec/factbook_spec.rb
|
349
|
+
- spec/fao_papua_new_guinea_spec.rb
|
349
350
|
- spec/dbpedia_spec.rb
|
350
|
-
- spec/fao_austria_spec.rb
|
351
351
|
- spec/geo_names_spec.rb
|
352
|
+
- spec/spec_helper.rb
|
353
|
+
- spec/freebase_spec.rb
|
352
354
|
- spec/gdacs_spec.rb
|
353
|
-
- spec/
|
355
|
+
- spec/fao_austria_spec.rb
|
354
356
|
has_rdoc:
|
data/exploitation/freebase.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
require 'cgi'
|
2
|
-
require 'httparty'
|
3
|
-
require 'json'
|
4
|
-
require 'pp'
|
5
|
-
|
6
|
-
#query = [{'id' => '/en/austria', 'name' => nil, 'type' => '/location/country', '*' => [{}] }]
|
7
|
-
query = [{'id' => '/en/austria', 'type' => '/type/property' }]
|
8
|
-
query_envelope = {'query' => query }
|
9
|
-
service_url = 'http://api.freebase.com/api/service/mqlread'
|
10
|
-
url = service_url + '?query=' + CGI::escape(query_envelope.to_json)
|
11
|
-
|
12
|
-
response = HTTParty.get(url, :format => :json)
|
13
|
-
puts response
|