semantic-crawler 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /doc/
2
+ /rdoc/
3
+ *.swp
4
+ /.bundle/
5
+ /.yardoc
6
+ /log/*.log
7
+ /log/*.log.*
8
+ /pkg/
9
+ /test.sh
10
+ test/dummy/log/*.log
11
+ test/dummy/tmp/
12
+ test/dummy/.sass-cache
13
+ /coverage
14
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --profile
3
+ #--format documentation
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.3
data/.yardopts ADDED
@@ -0,0 +1,5 @@
1
+ --title "Semantic Crawler Library for Ruby"
2
+ --output-dir doc/yard
3
+ --markup rdoc
4
+ --readme README.rdoc
5
+ -
data/Gemfile ADDED
@@ -0,0 +1,18 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Declare your gem's dependencies in semantic_crawler.gemspec.
4
+ # Bundler will treat runtime dependencies like base dependencies, and
5
+ # development dependencies will be added by default to the :development group.
6
+ gemspec
7
+
8
+ # jquery-rails is used by the dummy application
9
+ gem "jquery-rails"
10
+
11
+ #
12
+ # Declare any dependencies that are still in development here instead of in
13
+ # your gemspec. These might include edge Rails or gems from your path or
14
+ # Git. Remember to move these dependencies to your gemspec before releasing
15
+ # your gem to rubygems.org.
16
+
17
+ # To use debugger
18
+ # gem 'ruby-debug'
data/README.rdoc CHANGED
@@ -1,4 +1,4 @@
1
- = SemanticCrawler {<img src="https://secure.travis-ci.org/obale/semantic_crawler.png?branch=master"/>}[http://travis-ci.org/#!/obale/semantic_crawler]
1
+ = SemanticCrawler
2
2
 
3
3
  This project encapsulates data gathering from different sources.
4
4
  It simplifies the extension of internal data with public available
@@ -8,15 +8,15 @@ to bypass complex NLP (natural language processing).
8
8
 
9
9
  == Supported Sources
10
10
 
11
- * CIA Factbook RDF Dump, see http://www4.wiwiss.fu-berlin.de/factbook/directory/countries
12
- * GDACS (see http://gdacs.org)
13
- * FAO - Food and Agriculture Organization of the United Nations (see http://www.fao.org)
14
- * LinkedGeoData - LGD (see http://linkedgeodata.org)
15
- * [Started] Freebase (see http://freebase.com)
11
+ * {Geonames}[http://www.geonames.org/]
12
+ * {CIA Factbook RDF Dump}[http://www4.wiwiss.fu-berlin.de/factbook/directory/countries]
13
+ * {FAO - Food and Agriculture Organization of the United Nations}[http://www.fao.org]
14
+ * {LinkedGeoData - LGD}[http://linkedgeodata.org]
15
+ * {GDACS}[http://gdacs.org]
16
+ * [Started] {Freebase}[http://freebase.com]
16
17
 
17
18
  === TODO
18
19
 
19
- * Geonames
20
20
  * DBPedia
21
21
  * Different Government Sources
22
22
 
@@ -45,8 +45,25 @@ Or from source:
45
45
 
46
46
  == Examples
47
47
 
48
+ This examples are only a short outline how to use the library. For more
49
+ information read the documentation or look into the source code. To use the
50
+ library include or execute the following line:
51
+
48
52
  >> require "semantic_crawler"
49
53
 
54
+ === GeoNames
55
+
56
+ The GeoNames module is able to return a Factbook::Country and Fao::Country
57
+ module on the base of input GPS coordinates (lat/long).
58
+
59
+ >> @innsbruck = SemanticCrawler::GeoNames::Country.new(47.271338, 11.395333)
60
+ >> articles = @innsbruck.get_wikipedia_articles
61
+ >> articles.each do |article|
62
+ >> puts article.wikipedia_url
63
+ >> end
64
+ >> factbook_obj = @innsbruck.get_factbook_country
65
+ >> fao_obj = @innsbruck.get_fao_country
66
+
50
67
  === Factbook
51
68
 
52
69
  Fetch Factbook information about Austria:
@@ -90,7 +107,7 @@ Country information from {FAO}[http://www.fao.org]:
90
107
 
91
108
  === LinkedGeoData
92
109
 
93
- Geo information from {LinkedGeoData}[http://linkedgeodata.org]
110
+ Geo information from {LinkedGeoData}[http://linkedgeodata.org]:
94
111
 
95
112
  >> # All nodes around the center of dresden, in a radius of 1000m
96
113
  >> @dresden = SemanticCrawler::LinkedGeoData::RelevantNodes.new(51.033333, 13.733333, 1000)
data/Rakefile CHANGED
@@ -21,7 +21,6 @@ RDoc::Task.new(:rdoc) do |rdoc|
21
21
  rdoc.options << '--line-numbers'
22
22
  rdoc.options << '--main=README.rdoc'
23
23
  rdoc.rdoc_files.include('README.rdoc')
24
- rdoc.rdoc_files.include('CHANGELOG.rdoc')
25
24
  rdoc.rdoc_files.include('lib/**/*.rb')
26
25
  end
27
26
 
data/changelog.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+ currentMerge=$(git log --merges master -1 --format=%h)
3
+ previousMerge=$(git log --merges master -2 --format=%h | tail -n1)
4
+ git log ${previousMerge}..${currentMerge}
@@ -0,0 +1,13 @@
1
+ require 'cgi'
2
+ require 'httparty'
3
+ require 'json'
4
+ require 'pp'
5
+
6
+ #query = [{'id' => '/en/austria', 'name' => nil, 'type' => '/location/country', '*' => [{}] }]
7
+ query = [{'id' => '/en/austria', 'type' => '/type/property' }]
8
+ query_envelope = {'query' => query }
9
+ service_url = 'http://api.freebase.com/api/service/mqlread'
10
+ url = service_url + '?query=' + CGI::escape(query_envelope.to_json)
11
+
12
+ response = HTTParty.get(url, :format => :json)
13
+ puts response
@@ -1,6 +1,7 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  require "httparty"
4
+ require "geonames"
4
5
 
5
6
  # The top level module contains the different data sources
6
7
  # as sub-modules. Currently there are the following modules
@@ -38,6 +39,7 @@ require "semantic_crawler/dbpedia"
38
39
 
39
40
  # GeoNames.org - module: GeoNames
40
41
  require "semantic_crawler/geo_names"
42
+ require "semantic_crawler/geo_names/country"
41
43
 
42
44
  # LinkedGeoData.org - module: LinkedGeoData
43
45
  require "semantic_crawler/linked_geo_data"
@@ -118,6 +118,7 @@ module SemanticCrawler
118
118
  end
119
119
  end
120
120
 
121
+ # @return [String] The document serialized as XML
121
122
  def xml_document
122
123
  @doc.to_s
123
124
  end
@@ -3,6 +3,7 @@ module SemanticCrawler
3
3
  # Represents Food and Agriculture information about one country.
4
4
  class Country
5
5
 
6
+ # The URI prefix of the fao country object
6
7
  @@URI_PREFIX = "http://www.fao.org/countryprofiles/geoinfo/geopolitical/data/"
7
8
 
8
9
  # Namespace hash
@@ -35,11 +36,13 @@ module SemanticCrawler
35
36
  end
36
37
 
37
38
  # The dbpedia identifier (from fao:codeDBPediaID)
39
+ # @return [String]
38
40
  def code_dbpedia_id
39
41
  query_root_node("fao:codeDBPediaID/text()", @@NAMESPACES).to_s
40
42
  end
41
43
 
42
44
  # Links to additional information (from owl:sameAs)
45
+ # @return [Array<String>]
43
46
  def same_as
44
47
  returnLinks = []
45
48
  links = query_root_node("owl:sameAs/@rdf:resource", @@NAMESPACES)
@@ -50,63 +53,75 @@ module SemanticCrawler
50
53
  end
51
54
 
52
55
  # The type as URL of this entity (from rdf:type)
56
+ # @return [String]
53
57
  def type_url
54
58
  query_root_node("rdf:type/@rdf:resource", @@NAMESPACES).to_s
55
59
  end
56
60
 
57
61
  # The maximum latitude (from fao:hasMaxLatitude)
62
+ # @return [String]
58
63
  def max_latitude
59
64
  query_root_node("fao:hasMaxLatitude/text()", @@NAMESPACES).to_s
60
65
  end
61
66
 
62
67
  # The maximum longitude (from fao:hasMaxLongitude)
68
+ # @return [String]
63
69
  def max_longitude
64
70
  query_root_node("fao:hasMaxLongitude/text()", @@NAMESPACES).to_s
65
71
  end
66
72
 
67
73
  # The minimum latitude (from fao:hasMinLatitude)
74
+ # @return [String]
68
75
  def min_latitude
69
76
  query_root_node("fao:hasMinLatitude/text()", @@NAMESPACES).to_s
70
77
  end
71
78
 
72
79
  # The minimum longitude (from fao:hasMinLongitude)
80
+ # @return [String]
73
81
  def min_longitude
74
82
  query_root_node("fao:hasMinLongitude/text()", @@NAMESPACES).to_s
75
83
  end
76
84
 
77
85
  # Human readable description about the land area (from fao:landAreaNotes)
86
+ # @return [String]
78
87
  def land_area_notes
79
88
  query_root_node("fao:landAreaNotes/text()", @@NAMESPACES).to_s
80
89
  end
81
90
 
82
91
  # Land area total value (from fao:landAreaTotal)
92
+ # @return [String]
83
93
  def land_area_total
84
94
  query_root_node("fao:landAreaTotal/text()", @@NAMESPACES).to_s
85
95
  end
86
96
 
87
97
  # Land area unit (from fao:landAreaUnit)
98
+ # @return [String]
88
99
  def land_area_unit
89
100
  query_root_node("fao:landAreaUnit/text()", @@NAMESPACES).to_s
90
101
  end
91
102
 
92
103
  # Land area year (from fao:landAreaYear)
104
+ # @return [String]
93
105
  def land_area_year
94
106
  query_root_node("fao:landAreaYear/text()", @@NAMESPACES).to_s
95
107
  end
96
108
 
97
109
  # The currency name.
98
- # @param [Lang] The language in which the currency name should be returned
99
- def name_currency(lang)
110
+ # @param [String] The language in which the currency name should be returned
111
+ # @return [String]
112
+ def name_currency(lang = 'en')
100
113
  query_root_node("fao:nameCurrency[@xml:lang='#{lang}']/text()", @@NAMESPACES).to_s
101
114
  end
102
115
 
103
116
  # The official country name
104
- # @param [Lang] The language in which the official name should be returned
105
- def official_name(lang)
117
+ # @param [String] The language in which the official name should be returned
118
+ # @return [String]
119
+ def official_name(lang = 'en')
106
120
  query_root_node("fao:nameOfficial[@xml:lang='#{lang}']/text()", @@NAMESPACES).to_s
107
121
  end
108
122
 
109
123
  # Classification of this country as name (from fao:isInGroup)
124
+ # @return [Array<String>]
110
125
  def is_in_group_name
111
126
  returnGroup = []
112
127
  group = query_root_node("fao:isInGroup/@rdf:resource", @@NAMESPACES)
@@ -120,6 +135,7 @@ module SemanticCrawler
120
135
 
121
136
  # Classification of this country as dereferenceable URL (from
122
137
  # fao:isInGroup)
138
+ # @return [Array<String>]
123
139
  def is_in_group_url
124
140
  returnGroup = []
125
141
  group = query_root_node("fao:isInGroup/@rdf:resource", @@NAMESPACES)
@@ -133,6 +149,7 @@ module SemanticCrawler
133
149
 
134
150
  # Returns all countries that share a boarder with this country (as
135
151
  # dereferencable URL - from fao:hasBorderWith)
152
+ # @return [Array<String>]
136
153
  def has_boarder_with_url
137
154
  returnGroup = []
138
155
  group = query_root_node("fao:hasBorderWith/@rdf:resource", @@NAMESPACES)
@@ -146,6 +163,7 @@ module SemanticCrawler
146
163
 
147
164
  # Returns all countries that share a boarder with this country (as
148
165
  # name)
166
+ # @return [Array<String>]
149
167
  def has_boarder_with_name
150
168
  returnGroup = []
151
169
  group = query_root_node("fao:hasBorderWith/@rdf:resource", @@NAMESPACES)
@@ -158,36 +176,43 @@ module SemanticCrawler
158
176
  end
159
177
 
160
178
  # Population notes (from fao:populationNotes)
179
+ # @return [String]
161
180
  def population_notes
162
181
  query_root_node("fao:populationNotes/text()", @@NAMESPACES).to_s
163
182
  end
164
183
 
165
184
  # Population total (from fao:populationTotal)
185
+ # @return [String]
166
186
  def population_total
167
187
  query_root_node("fao:populationTotal/text()", @@NAMESPACES).to_s
168
188
  end
169
189
 
170
190
  # Population unit (from fao:populationUnit)
191
+ # @return [String]
171
192
  def population_unit
172
193
  query_root_node("fao:populationUnit/text()", @@NAMESPACES).to_s
173
194
  end
174
195
 
175
196
  # Population year (from fao:populationYear)
197
+ # @return [String]
176
198
  def population_year
177
199
  query_root_node("fao:populationYear/text()", @@NAMESPACES).to_s
178
200
  end
179
201
 
180
202
  # Entity is valid since (from fao:validSince)
203
+ # @return [String]
181
204
  def valid_since
182
205
  query_root_node("fao:validSince/text()", @@NAMESPACES).to_s
183
206
  end
184
207
 
185
208
  # Entity is valid until (from fao:validUntil)
209
+ # @return [String]
186
210
  def valid_until
187
211
  query_root_node("fao:validUntil/text()", @@NAMESPACES).to_s
188
212
  end
189
213
 
190
214
  # Executes a xpath query with optional a hash with namespaces
215
+ # @return [String]
191
216
  def query_root_node(xpath_query, namespaces = {})
192
217
  if !@root_node.nil?
193
218
  @root_node.xpath(xpath_query, namespaces)
@@ -195,6 +220,7 @@ module SemanticCrawler
195
220
  end
196
221
 
197
222
  # Outputs the document as XML
223
+ # @return [String] The document serialized as XML
198
224
  def xml_document
199
225
  @root_node.to_s
200
226
  end
@@ -11,6 +11,7 @@ module SemanticCrawler
11
11
  # * Volcanoes
12
12
  class Feed
13
13
 
14
+ # XML namespaces used for the parsing process
14
15
  @@NAMESPACES = {
15
16
  "atom" => "http://www.w3.org/2005/Atom"
16
17
  }
@@ -0,0 +1,48 @@
1
+ module SemanticCrawler
2
+ module GeoNames
3
+ # Represents Food and Agriculture information about one country.
4
+ class Country
5
+
6
+ # @attribute [r]
7
+ # The input latitude
8
+ attr_reader :latitude
9
+
10
+ # @attribute [r]
11
+ # The input longitude
12
+ attr_reader :longitude
13
+
14
+ def initialize(new_latitude, new_longitude)
15
+ @latitude = new_latitude
16
+ @longitude = new_longitude
17
+ end
18
+
19
+ # Returns an ISO 3166-1 alpha-2 country code
20
+ def get_country_code
21
+ Geonames::WebService.country_code @latitude, @longitude
22
+ end
23
+
24
+ # Returns the name of the country
25
+ def get_country_name
26
+ subdivision = Geonames::WebService.country_subdivision @latitude, @longitude
27
+ subdivision.country_name
28
+ end
29
+
30
+ # Returns a Wikipedia object that contains a link to the article,
31
+ # summary and many more fields.
32
+ def get_wikipedia_articles
33
+ Geonames::WebService.find_nearby_wikipedia :lat => @latitude, :long => @longitude
34
+ end
35
+
36
+ # @return [SemanticCrawler::Factbook::Country] A Factbook country object
37
+ def get_factbook_country
38
+ SemanticCrawler::Factbook::Country.new get_country_name
39
+ end
40
+
41
+ # @return [SemanticCrawler::Fao::Country] A Food and Agriculture country object
42
+ def get_fao_country
43
+ SemanticCrawler::Fao::Country.new get_country_name
44
+ end
45
+
46
+ end
47
+ end
48
+ end
@@ -23,33 +23,39 @@ module SemanticCrawler
23
23
  end
24
24
 
25
25
  # geo:lat
26
+ # @return [String]
26
27
  def latitude
27
- query_root_node("geo:lat/text()", @@NAMESPACES)
28
+ query_root_node("geo:lat/text()", @@NAMESPACES).to_s
28
29
  end
29
30
 
30
31
  # geo:long
32
+ # @return [String]
31
33
  def longitude
32
- query_root_node("geo:long/text()", @@NAMESPACES)
34
+ query_root_node("geo:long/text()", @@NAMESPACES).to_s
33
35
  end
34
36
 
35
37
  # The type as URL
38
+ # @return [String]
36
39
  def type
37
- query_root_node("rdf:type/@rdf:resource", @@NAMESPACES)
40
+ query_root_node("rdf:type/@rdf:resource", @@NAMESPACES).to_s
38
41
  end
39
42
 
40
43
  # Comment about the entity
44
+ # @return [String]
41
45
  def note
42
- query_root_node("lgdp:note/text()", @@NAMESPACES)
46
+ query_root_node("lgdp:note/text()", @@NAMESPACES).to_s
43
47
  end
44
48
 
45
49
  # Data Source
50
+ # @return [String]
46
51
  def created_by
47
- query_root_node("lgdp:created_by/text()", @@NAMESPACES)
52
+ query_root_node("lgdp:created_by/text()", @@NAMESPACES).to_s
48
53
  end
49
54
 
50
55
  # User link
56
+ # @return [String]
51
57
  def contributor
52
- query_root_node("lgdo:contributor/@rdf:resource", @@NAMESPACES)
58
+ query_root_node("lgdo:contributor/@rdf:resource", @@NAMESPACES).to_s
53
59
  end
54
60
 
55
61
  # Link to way
@@ -58,33 +64,39 @@ module SemanticCrawler
58
64
  end
59
65
 
60
66
  # Street name
67
+ # @return [String]
61
68
  def has_street
62
- query_root_node("lgdo:hasStreet/text()", @@NAMESPACES)
69
+ query_root_node("lgdo:hasStreet/text()", @@NAMESPACES).to_s
63
70
  end
64
71
 
65
72
  # Postal code
73
+ # @return [String]
66
74
  def has_postal_code
67
- query_root_node("lgdo:hasPostalCode/text()", @@NAMESPACES)
75
+ query_root_node("lgdo:hasPostalCode/text()", @@NAMESPACES).to_s
68
76
  end
69
77
 
70
78
  # House number
79
+ # @return [String]
71
80
  def has_house_number
72
- query_root_node("lgdo:hasHouseNumber/text()", @@NAMESPACES)
81
+ query_root_node("lgdo:hasHouseNumber/text()", @@NAMESPACES).to_s
73
82
  end
74
83
 
75
84
  # Country in ISO 3166-1 alpha-2
85
+ # @return [String]
76
86
  def has_country
77
- query_root_node("lgdo:hasCountry/text()", @@NAMESPACES)
87
+ query_root_node("lgdo:hasCountry/text()", @@NAMESPACES).to_s
78
88
  end
79
89
 
80
90
  # City name
91
+ # @return [String]
81
92
  def has_city
82
- query_root_node("lgdo:hasCity/text()", @@NAMESPACES)
93
+ query_root_node("lgdo:hasCity/text()", @@NAMESPACES).to_s
83
94
  end
84
95
 
85
96
  # Is this place wheelchair friendly?
97
+ # @return [String]
86
98
  def wheelchair
87
- query_root_node("lgdo:wheelchair/@rdf:resource", @@NAMESPACES)
99
+ query_root_node("lgdo:wheelchair/@rdf:resource", @@NAMESPACES).to_s
88
100
  end
89
101
 
90
102
  # Query the root_node
@@ -95,6 +107,7 @@ module SemanticCrawler
95
107
  end
96
108
 
97
109
  # Outputs the document as XML
110
+ # @return [String] The document serialized as XML
98
111
  def xml_document
99
112
  @root_node.to_s
100
113
  end