semantic-crawler 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,15 +10,19 @@ module SemanticCrawler
10
10
  "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
11
11
  }
12
12
 
13
+ # @attribute [r]
13
14
  # The input latitude value
14
15
  attr_reader :latitude
15
16
 
17
+ # @attribute [r]
16
18
  # The input longitude value
17
19
  attr_reader :longitude
18
20
 
21
+ # @attribute [r]
19
22
  # The input radius value in meters
20
23
  attr_reader :radius
21
24
 
25
+ # @attribute [r]
22
26
  # The linkedgeodata.org link to the relevant nodes
23
27
  attr_reader :url
24
28
 
@@ -41,6 +45,7 @@ module SemanticCrawler
41
45
 
42
46
  # Returns an array of SemanticCrawler::LinkedGeoData::RelevantNode
43
47
  # objects
48
+ # @return [Array<SemanticCrawler::LinkedGeoData::RelevantNode>]
44
49
  def relevant_nodes
45
50
  nodeset = query_root_node("rdf:Description", @@NAMESPACES)
46
51
  @items = []
@@ -61,6 +66,7 @@ module SemanticCrawler
61
66
  end
62
67
 
63
68
  # Outputs the document as XML
69
+ # @return [String] The document serialized as XML
64
70
  def xml_document
65
71
  @root_node.to_s
66
72
  end
@@ -1,5 +1,5 @@
1
1
  module SemanticCrawler
2
2
 
3
3
  # The current version of this library.
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
data/log/.gitkeep ADDED
File without changes
@@ -0,0 +1,35 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+
3
+ # Maintain your gem's version:
4
+ require "semantic_crawler/version"
5
+
6
+ # Describe your gem and declare its dependencies:
7
+ Gem::Specification.new do |s|
8
+ s.name = "semantic-crawler"
9
+ s.version = SemanticCrawler::VERSION
10
+ s.authors = ["Alex Oberhauser"]
11
+ s.email = ["alex.oberhauser@sigimera.org"]
12
+ s.homepage = "https://github.com/obale/semantic_crawler"
13
+ s.summary = "SemanticCrawler is a ruby library that encapsulates data gathering from different sources."
14
+ s.description = "SemanticCrawler is a ruby library that encapsulates data gathering from different sources. Currently country information from Factbook and FAO (Food and Agriculture Organization of the United Nations), crisis information from GDACS.org and geo data from LinkedGeoData are supported. Additional the GeoNames module allows to get Factbook and FAO country information from GPS coordinates."
15
+
16
+ #s.files = Dir["{app,config,db,lib,log}/**/*"] + ["MIT-LICENSE", "Rakefile", "README.rdoc"]
17
+ s.files = `git ls-files`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split('\n').map{ |f| File.basename(f) }
19
+ s.test_files = Dir["{test,spec}/**/*"]
20
+ s.require_paths = ['lib']
21
+
22
+ s.add_dependency "httparty"
23
+ s.add_dependency "json"
24
+ #s.add_dependency "google-api-client" # Freebase API access
25
+ s.add_dependency "nokogiri" # XML Parsing
26
+ s.add_dependency "geonames" # Use for the GeoNames module
27
+
28
+ s.add_development_dependency "yard"
29
+ s.add_development_dependency "rails", "~> 3.2.2"
30
+ s.add_development_dependency "sqlite3"
31
+ s.add_development_dependency "rspec-rails"
32
+ s.add_development_dependency "simplecov"
33
+ s.add_development_dependency "simplecov-rcov"
34
+
35
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe SemanticCrawler::Dbpedia do
4
+ pending "Implement 'Dbpedia' module"
5
+
6
+ before(:all) do
7
+ end
8
+
9
+ end
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ describe SemanticCrawler::Factbook do
4
+
5
+ before(:all) do
6
+ @austria = SemanticCrawler::Factbook::Country.new("Austria")
7
+ end
8
+
9
+ it "test nil country" do
10
+ wrongCountry = SemanticCrawler::Factbook::Country.new(nil)
11
+ wrongCountry.url.nil?.should == true
12
+ end
13
+
14
+ it "test unknown country" do
15
+ wrongCountry = SemanticCrawler::Factbook::Country.new("xyz")
16
+ wrongCountry.landboundary.nil?.should == true
17
+ wrongCountry.background.nil?.should == true
18
+ wrongCountry.name.nil?.should == true
19
+ end
20
+
21
+ it "init austria country information" do
22
+ be_valid @austria.country_name.eql?("Austria")
23
+ be_valid @austria.url.eql?("http://www4.wiwiss.fu-berlin.de/factbook/data/austria")
24
+ end
25
+
26
+ it "get doc dump" do
27
+ be_valid !@austria.xml_document.empty?
28
+ end
29
+
30
+ it "get austria name" do
31
+ value = @austria.name
32
+ be_valid value.to_s.eql?("Austria")
33
+ end
34
+
35
+ it "get austria background information" do
36
+ value = @austria.background
37
+ be_valid !value.nil?
38
+ end
39
+
40
+ it "get austria population information" do
41
+ value = @austria.population_total
42
+ be_valid !value.nil?
43
+ end
44
+
45
+ it "get austria legalsystem property (not wrapped)" do
46
+ value = @austria.get_factbook_property("legalsystem")
47
+ be_valid !value.nil?
48
+ end
49
+
50
+ it "get austria latitude" do
51
+ value = @austria.latitude
52
+ be_valid value.to_s.eql?("47.3333333333333")
53
+ end
54
+
55
+ it "get austria longitude" do
56
+ value = @austria.longitude
57
+ be_valid value.to_s.eql?("13.3333333333333")
58
+ end
59
+
60
+ it "get austria landboundary" do
61
+ value = @austria.landboundary
62
+ be_valid value.count == 7
63
+ end
64
+
65
+ it "get austria terrain" do
66
+ value = @austria.terrain
67
+ be_valid !value.nil?
68
+ end
69
+
70
+ it "get austria total airports" do
71
+ value = @austria.airports_total
72
+ be_valid value.to_s.eql?("55")
73
+ end
74
+
75
+ it "get austria heliports" do
76
+ value = @austria.heliports
77
+ be_valid value.to_s.eql?("1")
78
+ end
79
+
80
+ it "get austria climate" do
81
+ value = @austria.climate
82
+ be_valid !value.nil?
83
+ end
84
+
85
+ it "get austria location description" do
86
+ value = @austria.location
87
+ be_valid !value.nil?
88
+ end
89
+ end
@@ -0,0 +1,118 @@
1
+ require 'spec_helper'
2
+
3
+ describe SemanticCrawler::Fao do
4
+
5
+ before(:all) do
6
+ @obj = SemanticCrawler::Fao::Country.new("Austria")
7
+ end
8
+
9
+ it "init empty country object" do
10
+ @obj = SemanticCrawler::Fao::Country.new(nil)
11
+ @obj.country_name.should.eql?(nil)
12
+ @obj.url.should eq(nil)
13
+ end
14
+
15
+ it "check austria object" do
16
+ @obj.country_name.should.eql?("Austria")
17
+ @obj.url.should eq("http://www.fao.org/countryprofiles/geoinfo/geopolitical/data/Austria")
18
+ end
19
+
20
+ it "get austria RDF/XML dump" do
21
+ be_valid !@obj.xml_document.empty?
22
+ end
23
+
24
+ it "get austria type" do
25
+ @obj.type_url.should eq("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/self_governing")
26
+ end
27
+
28
+ it "get austria location (max/min lat, max/min long)" do
29
+ @obj.max_latitude.should eq("49.02")
30
+ @obj.max_longitude.should eq("17.16")
31
+ @obj.min_latitude.should eq("46.38")
32
+ @obj.min_longitude.should eq("9.54")
33
+ end
34
+
35
+ it "get land area" do
36
+ @obj.land_area_notes.should eq("Official data reported on FAO Questionnaires from countries")
37
+ @obj.land_area_total.should eq("8243.5")
38
+ @obj.land_area_unit.should eq("1000 Ha")
39
+ @obj.land_area_year.should eq("2009")
40
+ end
41
+
42
+ it "get currency in en/es/fr" do
43
+ @obj.name_currency("en").should eq("euro")
44
+ @obj.name_currency("es").should eq("euro")
45
+ @obj.name_currency("fr").should eq("euro")
46
+ end
47
+
48
+ it "get official name in en/es/fr" do
49
+ @obj.official_name("en").should eq("the Republic of Austria")
50
+ @obj.official_name("es").should eq("la Rep&#xFA;blica de Austria")
51
+ @obj.official_name("fr").should eq("la R&#xE9;publique d'Autriche")
52
+ end
53
+
54
+ it "get is in group (URL)" do
55
+ group = @obj.is_in_group_url
56
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/EU").should be_true
57
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Europe").should be_true
58
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/FAO").should be_true
59
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/World").should be_true
60
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/western_Europe").should be_true
61
+ end
62
+
63
+ it "get is in group (Name)" do
64
+ group = @obj.is_in_group_name
65
+ group.include?("EU").should be_true
66
+ group.include?("Europe").should be_true
67
+ group.include?("FAO").should be_true
68
+ group.include?("World").should be_true
69
+ group.include?("western_Europe").should be_true
70
+ end
71
+
72
+ it "get has boarder with (URL)" do
73
+ group = @obj.has_boarder_with_url
74
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Czech_Republic_the").should be_true
75
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Germany").should be_true
76
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Hungary").should be_true
77
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Italy").should be_true
78
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Liechtenstein").should be_true
79
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Slovakia").should be_true
80
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Slovenia").should be_true
81
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Switzerland").should be_true
82
+ end
83
+
84
+ it "get has boarder with (Name)" do
85
+ group = @obj.has_boarder_with_name
86
+ group.include?("Czech_Republic_the").should be_true
87
+ group.include?("Germany").should be_true
88
+ group.include?("Hungary").should be_true
89
+ group.include?("Italy").should be_true
90
+ group.include?("Liechtenstein").should be_true
91
+ group.include?("Slovakia").should be_true
92
+ group.include?("Slovenia").should be_true
93
+ group.include?("Switzerland").should be_true
94
+ end
95
+
96
+ it "get same as links" do
97
+ links = @obj.same_as
98
+ links.include?("http://aims.fao.org/aos/agrovoc/c_718").should be_true
99
+ links.include?("http://dbpedia.org/resource/Austria").should be_true
100
+ end
101
+
102
+ it "get population information (notes/total/unit/year)" do
103
+ @obj.population_notes.should eq("2010 Revision from the UN Population Division")
104
+ @obj.population_total.should eq("8394.0")
105
+ @obj.population_unit.should eq("1000")
106
+ @obj.population_year.should eq("2010")
107
+ end
108
+
109
+ it "get valid since/until" do
110
+ @obj.valid_since.should eq("1985")
111
+ @obj.valid_until.should eq("9999")
112
+ end
113
+
114
+ it "get dbpedia id" do
115
+ @obj.code_dbpedia_id.should eq("Austria")
116
+ end
117
+
118
+ end
@@ -0,0 +1,97 @@
1
+ require 'spec_helper'
2
+
3
+ describe SemanticCrawler::Fao do
4
+
5
+ before(:all) do
6
+ @obj = SemanticCrawler::Fao::Country.new("Papua New Guinea")
7
+ end
8
+
9
+ it "check papua new guinea object" do
10
+ @obj.country_name.should eq("Papua New Guinea")
11
+ @obj.url.should eq("http://www.fao.org/countryprofiles/geoinfo/geopolitical/data/Papua_New_Guinea")
12
+ end
13
+
14
+ it "get papua new guinea RDF/XML dump" do
15
+ be_valid !@obj.xml_document.empty?
16
+ end
17
+
18
+ it "get papa new guinea type" do
19
+ @obj.type_url.should eq("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/self_governing")
20
+ end
21
+
22
+ it "get papa new guinea location (max/min lat, max/min long)" do
23
+ @obj.max_latitude.should eq("-1.13")
24
+ @obj.max_longitude.should eq("159.49")
25
+ @obj.min_latitude.should eq("-11.66")
26
+ @obj.min_longitude.should eq("140.87")
27
+ end
28
+
29
+ it "get land area" do
30
+ @obj.land_area_notes.should eq("Manual Estimation")
31
+ @obj.land_area_total.should eq("45286.0")
32
+ @obj.land_area_unit.should eq("1000 Ha")
33
+ @obj.land_area_year.should eq("2009")
34
+ end
35
+
36
+ it "get currency in en/es/fr" do
37
+ @obj.name_currency("en").should eq("kina")
38
+ @obj.name_currency("es").should eq("kina")
39
+ @obj.name_currency("fr").should eq("kina")
40
+ end
41
+
42
+ it "get official name in en/es/fr" do
43
+ @obj.official_name("en").should eq("Papua New Guinea")
44
+ @obj.official_name("es").should eq("Papua Nueva Guinea")
45
+ @obj.official_name("fr").should eq("la Papouasie-Nouvelle-Guin&#xE9;e")
46
+ end
47
+
48
+ it "get is in group (URL)" do
49
+ group = @obj.is_in_group_url
50
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Oceania").should be_true
51
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Melanesia").should be_true
52
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/FAO").should be_true
53
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/World").should be_true
54
+ end
55
+
56
+ it "get is in group (Name)" do
57
+ group = @obj.is_in_group_name
58
+ group.include?("Oceania").should be_true
59
+ group.include?("Melanesia").should be_true
60
+ group.include?("FAO").should be_true
61
+ group.include?("World").should be_true
62
+ end
63
+
64
+ it "get has boarder with (URL)" do
65
+ group = @obj.has_boarder_with_url
66
+ group.include?("http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/Indonesia").should be_true
67
+ end
68
+
69
+ it "get has boarder with (Name)" do
70
+ group = @obj.has_boarder_with_name
71
+ group.include?("Indonesia").should be_true
72
+ end
73
+
74
+ it "get same as links" do
75
+ links = @obj.same_as
76
+ links.include?("http://aims.fao.org/aos/agrovoc/c_5555").should be_true
77
+ links.include?("http://dbpedia.org/resource/Papua_New_Guinea").should be_true
78
+ end
79
+
80
+ it "get population information (notes/total/unit/year)" do
81
+ @obj.population_notes.should eq("2010 Revision from the UN Population Division")
82
+ @obj.population_total.should eq("6858.0")
83
+ @obj.population_unit.should eq("1000")
84
+ @obj.population_year.should eq("2010")
85
+ end
86
+
87
+ it "get valid since/until" do
88
+ @obj.valid_since.should eq("1985")
89
+ @obj.valid_until.should eq("9999")
90
+ end
91
+
92
+ it "get dbpedia id" do
93
+ @obj.code_dbpedia_id.should eq("Papua_New_Guinea")
94
+ end
95
+
96
+ end
97
+
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+ describe SemanticCrawler::Freebase do
4
+
5
+ it "test empty country" do
6
+ emptyCountry = SemanticCrawler::Freebase::Country.new(nil)
7
+ emptyCountry.get.nil?.should == true
8
+ end
9
+
10
+ it "init austria country information" do
11
+ austria = SemanticCrawler::Freebase::Country.new("Austria")
12
+ austria.get.to_s.nil?.should == false
13
+ end
14
+
15
+ pending "Implement the 'Freebase' module"
16
+
17
+ end
@@ -0,0 +1,111 @@
1
+ require 'spec_helper'
2
+
3
+ describe SemanticCrawler::Gdacs do
4
+ before(:all) do
5
+ @feed = SemanticCrawler::Gdacs::Feed.new
6
+ end
7
+
8
+ it "test wrong url" do
9
+ noFeed = SemanticCrawler::Gdacs::Feed.new(nil)
10
+ be_valid noFeed.nil?
11
+ end
12
+
13
+ it "init gdacs feed" do
14
+ be_valid !@feed.url.to_s.empty?
15
+ end
16
+
17
+ it "get channel title" do
18
+ be_valid !@feed.title.to_s.empty?
19
+ end
20
+
21
+ it "get channel description" do
22
+ be_valid !@feed.description.to_s.empty?
23
+ end
24
+
25
+ it "get channel link" do
26
+ be_valid !@feed.link.to_s.empty?
27
+ end
28
+
29
+ it "get feed dump" do
30
+ be_valid !@feed.xml_document.empty?
31
+ end
32
+
33
+ it "get channel pubDate" do
34
+ be_valid !@feed.pubDate.nil?
35
+ end
36
+
37
+ it "get channel webMaster" do
38
+ be_valid @feed.webMaster.to_s.include?("@")
39
+ end
40
+
41
+ it "get channel managingEditor" do
42
+ be_valid @feed.managingEditor.to_s.include?("@")
43
+ end
44
+
45
+ it "get channel atom:link" do
46
+ be_valid @feed.atom_link.to_s.start_with?("http")
47
+ end
48
+
49
+ it "get channel items + resources" do
50
+ items = @feed.items
51
+ be_valid !items.nil?
52
+ items.each do |item|
53
+ be_valid !item.title.to_s.empty?
54
+ be_valid !item.description.nil?
55
+ be_valid !item.link.to_s.empty?
56
+ be_valid !item.pubDate.nil?
57
+ be_valid !item.fromdate.nil?
58
+ be_valid !item.todate.nil?
59
+ be_valid !item.subject.nil?
60
+ be_valid !item.guid.nil?
61
+ be_valid !item.latitude.to_s.empty?
62
+ be_valid !item.longitude.to_s.empty?
63
+ be_valid !item.version.nil?
64
+ be_valid !item.eventtype.nil?
65
+ be_valid !item.alertlevel.nil?
66
+ be_valid !item.eventname.nil?
67
+ be_valid !item.eventid.nil?
68
+ be_valid !item.episodeid.nil?
69
+ be_valid !item.severity.nil?
70
+ be_valid !item.population.nil?
71
+ be_valid !item.vulnerability.nil?
72
+ be_valid !item.country.nil?
73
+ be_valid !item.iso3.nil?
74
+ be_valid !item.glide.nil?
75
+ be_valid !item.enclosure_url.nil?
76
+ be_valid !item.enclosure_type.nil?
77
+ be_valid !item.enclosure_length.nil?
78
+ resources = item.resources
79
+ resources.each do |resource|
80
+ be_valid !resource.title.to_s.empty?
81
+ be_valid !resource.url.to_s.empty?
82
+ be_valid !resource.type.to_s.empty?
83
+ be_valid !resource.description.nil?
84
+ be_valid !resource.accesslevel.nil?
85
+ be_valid !resource.xslt.nil?
86
+ be_valid !resource.acknowledgements.nil?
87
+ end
88
+ end
89
+ end
90
+
91
+ it "test emergency feed" do
92
+ noFeed = SemanticCrawler::Gdacs::EmergencyFeed.new(nil)
93
+ be_valid noFeed.nil?
94
+
95
+ @emergency_feed = SemanticCrawler::Gdacs::EmergencyFeed.new
96
+ be_valid !@emergency_feed.xml_document.empty?
97
+ @emergency_feed.title.to_s.should_not be_empty
98
+ @emergency_feed.description.to_s.should_not be_empty
99
+ items = @emergency_feed.items
100
+ items.each do |item|
101
+ item.link.to_s.should_not be_empty
102
+ item.title.to_s.should_not be_empty
103
+ item.eventType.to_s.should_not be_empty
104
+ item.pubDate.to_s.should_not be_empty
105
+ item.lastUpdate.to_s.should_not be_empty
106
+ item.glide.to_s.should_not be_empty
107
+ end
108
+
109
+ end
110
+
111
+ end