semantic-crawler 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /doc/
2
+ /rdoc/
3
+ *.swp
4
+ /.bundle/
5
+ /.yardoc
6
+ /log/*.log
7
+ /log/*.log.*
8
+ /pkg/
9
+ /test.sh
10
+ test/dummy/log/*.log
11
+ test/dummy/tmp/
12
+ test/dummy/.sass-cache
13
+ /coverage
14
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --profile
3
+ #--format documentation
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.3
data/.yardopts ADDED
@@ -0,0 +1,5 @@
1
+ --title "Semantic Crawler Library for Ruby"
2
+ --output-dir doc/yard
3
+ --markup rdoc
4
+ --readme README.rdoc
5
+ -
data/Gemfile ADDED
@@ -0,0 +1,18 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Declare your gem's dependencies in semantic_crawler.gemspec.
4
+ # Bundler will treat runtime dependencies like base dependencies, and
5
+ # development dependencies will be added by default to the :development group.
6
+ gemspec
7
+
8
+ # jquery-rails is used by the dummy application
9
+ gem "jquery-rails"
10
+
11
+ #
12
+ # Declare any dependencies that are still in development here instead of in
13
+ # your gemspec. These might include edge Rails or gems from your path or
14
+ # Git. Remember to move these dependencies to your gemspec before releasing
15
+ # your gem to rubygems.org.
16
+
17
+ # To use debugger
18
+ # gem 'ruby-debug'
data/README.rdoc CHANGED
@@ -1,4 +1,4 @@
1
- = SemanticCrawler {<img src="https://secure.travis-ci.org/obale/semantic_crawler.png?branch=master"/>}[http://travis-ci.org/#!/obale/semantic_crawler]
1
+ = SemanticCrawler
2
2
 
3
3
  This project encapsulates data gathering from different sources.
4
4
  It simplifies the extension of internal data with public available
@@ -8,15 +8,15 @@ to bypass complex NLP (natural language processing).
8
8
 
9
9
  == Supported Sources
10
10
 
11
- * CIA Factbook RDF Dump, see http://www4.wiwiss.fu-berlin.de/factbook/directory/countries
12
- * GDACS (see http://gdacs.org)
13
- * FAO - Food and Agriculture Organization of the United Nations (see http://www.fao.org)
14
- * LinkedGeoData - LGD (see http://linkedgeodata.org)
15
- * [Started] Freebase (see http://freebase.com)
11
+ * {Geonames}[http://www.geonames.org/]
12
+ * {CIA Factbook RDF Dump}[http://www4.wiwiss.fu-berlin.de/factbook/directory/countries]
13
+ * {FAO - Food and Agriculture Organization of the United Nations}[http://www.fao.org]
14
+ * {LinkedGeoData - LGD}[http://linkedgeodata.org]
15
+ * {GDACS}[http://gdacs.org]
16
+ * [Started] {Freebase}[http://freebase.com]
16
17
 
17
18
  === TODO
18
19
 
19
- * Geonames
20
20
  * DBPedia
21
21
  * Different Government Sources
22
22
 
@@ -45,8 +45,25 @@ Or from source:
45
45
 
46
46
  == Examples
47
47
 
48
+ This examples are only a short outline how to use the library. For more
49
+ information read the documentation or look into the source code. To use the
50
+ library include or execute the following line:
51
+
48
52
  >> require "semantic_crawler"
49
53
 
54
+ === GeoNames
55
+
56
+ The GeoNames module is able to return a Factbook::Country and Fao::Country
57
+ module on the base of input GPS coordinates (lat/long).
58
+
59
+ >> @innsbruck = SemanticCrawler::GeoNames::Country.new(47.271338, 11.395333)
60
+ >> articles = @innsbruck.get_wikipedia_articles
61
+ >> articles.each do |article|
62
+ >> puts article.wikipedia_url
63
+ >> end
64
+ >> factbook_obj = @innsbruck.get_factbook_country
65
+ >> fao_obj = @innsbruck.get_fao_country
66
+
50
67
  === Factbook
51
68
 
52
69
  Fetch Factbook information about Austria:
@@ -90,7 +107,7 @@ Country information from {FAO}[http://www.fao.org]:
90
107
 
91
108
  === LinkedGeoData
92
109
 
93
- Geo information from {LinkedGeoData}[http://linkedgeodata.org]
110
+ Geo information from {LinkedGeoData}[http://linkedgeodata.org]:
94
111
 
95
112
  >> # All nodes around the center of dresden, in a radius of 1000m
96
113
  >> @dresden = SemanticCrawler::LinkedGeoData::RelevantNodes.new(51.033333, 13.733333, 1000)
data/Rakefile CHANGED
@@ -21,7 +21,6 @@ RDoc::Task.new(:rdoc) do |rdoc|
21
21
  rdoc.options << '--line-numbers'
22
22
  rdoc.options << '--main=README.rdoc'
23
23
  rdoc.rdoc_files.include('README.rdoc')
24
- rdoc.rdoc_files.include('CHANGELOG.rdoc')
25
24
  rdoc.rdoc_files.include('lib/**/*.rb')
26
25
  end
27
26
 
data/changelog.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+ currentMerge=$(git log --merges master -1 --format=%h)
3
+ previousMerge=$(git log --merges master -2 --format=%h | tail -n1)
4
+ git log ${previousMerge}..${currentMerge}
@@ -0,0 +1,13 @@
1
+ require 'cgi'
2
+ require 'httparty'
3
+ require 'json'
4
+ require 'pp'
5
+
6
+ #query = [{'id' => '/en/austria', 'name' => nil, 'type' => '/location/country', '*' => [{}] }]
7
+ query = [{'id' => '/en/austria', 'type' => '/type/property' }]
8
+ query_envelope = {'query' => query }
9
+ service_url = 'http://api.freebase.com/api/service/mqlread'
10
+ url = service_url + '?query=' + CGI::escape(query_envelope.to_json)
11
+
12
+ response = HTTParty.get(url, :format => :json)
13
+ puts response
@@ -1,6 +1,7 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  require "httparty"
4
+ require "geonames"
4
5
 
5
6
  # The top level module contains the different data sources
6
7
  # as sub-modules. Currently there are the following modules
@@ -38,6 +39,7 @@ require "semantic_crawler/dbpedia"
38
39
 
39
40
  # GeoNames.org - module: GeoNames
40
41
  require "semantic_crawler/geo_names"
42
+ require "semantic_crawler/geo_names/country"
41
43
 
42
44
  # LinkedGeoData.org - module: LinkedGeoData
43
45
  require "semantic_crawler/linked_geo_data"
@@ -118,6 +118,7 @@ module SemanticCrawler
118
118
  end
119
119
  end
120
120
 
121
+ # @return [String] The document serialized as XML
121
122
  def xml_document
122
123
  @doc.to_s
123
124
  end
@@ -3,6 +3,7 @@ module SemanticCrawler
3
3
  # Represents Food and Agriculture information about one country.
4
4
  class Country
5
5
 
6
+ # The URI prefix of the fao country object
6
7
  @@URI_PREFIX = "http://www.fao.org/countryprofiles/geoinfo/geopolitical/data/"
7
8
 
8
9
  # Namespace hash
@@ -35,11 +36,13 @@ module SemanticCrawler
35
36
  end
36
37
 
37
38
  # The dbpedia identifier (from fao:codeDBPediaID)
39
+ # @return [String]
38
40
  def code_dbpedia_id
39
41
  query_root_node("fao:codeDBPediaID/text()", @@NAMESPACES).to_s
40
42
  end
41
43
 
42
44
  # Links to additional information (from owl:sameAs)
45
+ # @return [Array<String>]
43
46
  def same_as
44
47
  returnLinks = []
45
48
  links = query_root_node("owl:sameAs/@rdf:resource", @@NAMESPACES)
@@ -50,63 +53,75 @@ module SemanticCrawler
50
53
  end
51
54
 
52
55
  # The type as URL of this entity (from rdf:type)
56
+ # @return [String]
53
57
  def type_url
54
58
  query_root_node("rdf:type/@rdf:resource", @@NAMESPACES).to_s
55
59
  end
56
60
 
57
61
  # The maximum latitude (from fao:hasMaxLatitude)
62
+ # @return [String]
58
63
  def max_latitude
59
64
  query_root_node("fao:hasMaxLatitude/text()", @@NAMESPACES).to_s
60
65
  end
61
66
 
62
67
  # The maximum longitude (from fao:hasMaxLongitude)
68
+ # @return [String]
63
69
  def max_longitude
64
70
  query_root_node("fao:hasMaxLongitude/text()", @@NAMESPACES).to_s
65
71
  end
66
72
 
67
73
  # The minimum latitude (from fao:hasMinLatitude)
74
+ # @return [String]
68
75
  def min_latitude
69
76
  query_root_node("fao:hasMinLatitude/text()", @@NAMESPACES).to_s
70
77
  end
71
78
 
72
79
  # The minimum longitude (from fao:hasMinLongitude)
80
+ # @return [String]
73
81
  def min_longitude
74
82
  query_root_node("fao:hasMinLongitude/text()", @@NAMESPACES).to_s
75
83
  end
76
84
 
77
85
  # Human readable description about the land area (from fao:landAreaNotes)
86
+ # @return [String]
78
87
  def land_area_notes
79
88
  query_root_node("fao:landAreaNotes/text()", @@NAMESPACES).to_s
80
89
  end
81
90
 
82
91
  # Land area total value (from fao:landAreaTotal)
92
+ # @return [String]
83
93
  def land_area_total
84
94
  query_root_node("fao:landAreaTotal/text()", @@NAMESPACES).to_s
85
95
  end
86
96
 
87
97
  # Land area unit (from fao:landAreaUnit)
98
+ # @return [String]
88
99
  def land_area_unit
89
100
  query_root_node("fao:landAreaUnit/text()", @@NAMESPACES).to_s
90
101
  end
91
102
 
92
103
  # Land area year (from fao:landAreaYear)
104
+ # @return [String]
93
105
  def land_area_year
94
106
  query_root_node("fao:landAreaYear/text()", @@NAMESPACES).to_s
95
107
  end
96
108
 
97
109
  # The currency name.
98
- # @param [Lang] The language in which the currency name should be returned
99
- def name_currency(lang)
110
+ # @param [String] The language in which the currency name should be returned
111
+ # @return [String]
112
+ def name_currency(lang = 'en')
100
113
  query_root_node("fao:nameCurrency[@xml:lang='#{lang}']/text()", @@NAMESPACES).to_s
101
114
  end
102
115
 
103
116
  # The official country name
104
- # @param [Lang] The language in which the official name should be returned
105
- def official_name(lang)
117
+ # @param [String] The language in which the official name should be returned
118
+ # @return [String]
119
+ def official_name(lang = 'en')
106
120
  query_root_node("fao:nameOfficial[@xml:lang='#{lang}']/text()", @@NAMESPACES).to_s
107
121
  end
108
122
 
109
123
  # Classification of this country as name (from fao:isInGroup)
124
+ # @return [Array<String>]
110
125
  def is_in_group_name
111
126
  returnGroup = []
112
127
  group = query_root_node("fao:isInGroup/@rdf:resource", @@NAMESPACES)
@@ -120,6 +135,7 @@ module SemanticCrawler
120
135
 
121
136
  # Classification of this country as dereferenceable URL (from
122
137
  # fao:isInGroup)
138
+ # @return [Array<String>]
123
139
  def is_in_group_url
124
140
  returnGroup = []
125
141
  group = query_root_node("fao:isInGroup/@rdf:resource", @@NAMESPACES)
@@ -133,6 +149,7 @@ module SemanticCrawler
133
149
 
134
150
  # Returns all countries that share a boarder with this country (as
135
151
  # dereferencable URL - from fao:hasBorderWith)
152
+ # @return [Array<String>]
136
153
  def has_boarder_with_url
137
154
  returnGroup = []
138
155
  group = query_root_node("fao:hasBorderWith/@rdf:resource", @@NAMESPACES)
@@ -146,6 +163,7 @@ module SemanticCrawler
146
163
 
147
164
  # Returns all countries that share a boarder with this country (as
148
165
  # name)
166
+ # @return [Array<String>]
149
167
  def has_boarder_with_name
150
168
  returnGroup = []
151
169
  group = query_root_node("fao:hasBorderWith/@rdf:resource", @@NAMESPACES)
@@ -158,36 +176,43 @@ module SemanticCrawler
158
176
  end
159
177
 
160
178
  # Population notes (from fao:populationNotes)
179
+ # @return [String]
161
180
  def population_notes
162
181
  query_root_node("fao:populationNotes/text()", @@NAMESPACES).to_s
163
182
  end
164
183
 
165
184
  # Population total (from fao:populationTotal)
185
+ # @return [String]
166
186
  def population_total
167
187
  query_root_node("fao:populationTotal/text()", @@NAMESPACES).to_s
168
188
  end
169
189
 
170
190
  # Population unit (from fao:populationUnit)
191
+ # @return [String]
171
192
  def population_unit
172
193
  query_root_node("fao:populationUnit/text()", @@NAMESPACES).to_s
173
194
  end
174
195
 
175
196
  # Population year (from fao:populationYear)
197
+ # @return [String]
176
198
  def population_year
177
199
  query_root_node("fao:populationYear/text()", @@NAMESPACES).to_s
178
200
  end
179
201
 
180
202
  # Entity is valid since (from fao:validSince)
203
+ # @return [String]
181
204
  def valid_since
182
205
  query_root_node("fao:validSince/text()", @@NAMESPACES).to_s
183
206
  end
184
207
 
185
208
  # Entity is valid until (from fao:validUntil)
209
+ # @return [String]
186
210
  def valid_until
187
211
  query_root_node("fao:validUntil/text()", @@NAMESPACES).to_s
188
212
  end
189
213
 
190
214
  # Executes a xpath query with optional a hash with namespaces
215
+ # @return [String]
191
216
  def query_root_node(xpath_query, namespaces = {})
192
217
  if !@root_node.nil?
193
218
  @root_node.xpath(xpath_query, namespaces)
@@ -195,6 +220,7 @@ module SemanticCrawler
195
220
  end
196
221
 
197
222
  # Outputs the document as XML
223
+ # @return [String] The document serialized as XML
198
224
  def xml_document
199
225
  @root_node.to_s
200
226
  end
@@ -11,6 +11,7 @@ module SemanticCrawler
11
11
  # * Volcanoes
12
12
  class Feed
13
13
 
14
+ # XML namespaces used for the parsing process
14
15
  @@NAMESPACES = {
15
16
  "atom" => "http://www.w3.org/2005/Atom"
16
17
  }
@@ -0,0 +1,48 @@
1
+ module SemanticCrawler
2
+ module GeoNames
3
+ # Represents Food and Agriculture information about one country.
4
+ class Country
5
+
6
+ # @attribute [r]
7
+ # The input latitude
8
+ attr_reader :latitude
9
+
10
+ # @attribute [r]
11
+ # The input longitude
12
+ attr_reader :longitude
13
+
14
+ def initialize(new_latitude, new_longitude)
15
+ @latitude = new_latitude
16
+ @longitude = new_longitude
17
+ end
18
+
19
+ # Returns an ISO 3166-1 alpha-2 country code
20
+ def get_country_code
21
+ Geonames::WebService.country_code @latitude, @longitude
22
+ end
23
+
24
+ # Returns the name of the country
25
+ def get_country_name
26
+ subdivision = Geonames::WebService.country_subdivision @latitude, @longitude
27
+ subdivision.country_name
28
+ end
29
+
30
+ # Returns a Wikipedia object that contains a link to the article,
31
+ # summary and many more fields.
32
+ def get_wikipedia_articles
33
+ Geonames::WebService.find_nearby_wikipedia :lat => @latitude, :long => @longitude
34
+ end
35
+
36
+ # @return [SemanticCrawler::Factbook::Country] A Factbook country object
37
+ def get_factbook_country
38
+ SemanticCrawler::Factbook::Country.new get_country_name
39
+ end
40
+
41
+ # @return [SemanticCrawler::Fao::Country] A Food and Agriculture country object
42
+ def get_fao_country
43
+ SemanticCrawler::Fao::Country.new get_country_name
44
+ end
45
+
46
+ end
47
+ end
48
+ end
@@ -23,33 +23,39 @@ module SemanticCrawler
23
23
  end
24
24
 
25
25
  # geo:lat
26
+ # @return [String]
26
27
  def latitude
27
- query_root_node("geo:lat/text()", @@NAMESPACES)
28
+ query_root_node("geo:lat/text()", @@NAMESPACES).to_s
28
29
  end
29
30
 
30
31
  # geo:long
32
+ # @return [String]
31
33
  def longitude
32
- query_root_node("geo:long/text()", @@NAMESPACES)
34
+ query_root_node("geo:long/text()", @@NAMESPACES).to_s
33
35
  end
34
36
 
35
37
  # The type as URL
38
+ # @return [String]
36
39
  def type
37
- query_root_node("rdf:type/@rdf:resource", @@NAMESPACES)
40
+ query_root_node("rdf:type/@rdf:resource", @@NAMESPACES).to_s
38
41
  end
39
42
 
40
43
  # Comment about the entity
44
+ # @return [String]
41
45
  def note
42
- query_root_node("lgdp:note/text()", @@NAMESPACES)
46
+ query_root_node("lgdp:note/text()", @@NAMESPACES).to_s
43
47
  end
44
48
 
45
49
  # Data Source
50
+ # @return [String]
46
51
  def created_by
47
- query_root_node("lgdp:created_by/text()", @@NAMESPACES)
52
+ query_root_node("lgdp:created_by/text()", @@NAMESPACES).to_s
48
53
  end
49
54
 
50
55
  # User link
56
+ # @return [String]
51
57
  def contributor
52
- query_root_node("lgdo:contributor/@rdf:resource", @@NAMESPACES)
58
+ query_root_node("lgdo:contributor/@rdf:resource", @@NAMESPACES).to_s
53
59
  end
54
60
 
55
61
  # Link to way
@@ -58,33 +64,39 @@ module SemanticCrawler
58
64
  end
59
65
 
60
66
  # Street name
67
+ # @return [String]
61
68
  def has_street
62
- query_root_node("lgdo:hasStreet/text()", @@NAMESPACES)
69
+ query_root_node("lgdo:hasStreet/text()", @@NAMESPACES).to_s
63
70
  end
64
71
 
65
72
  # Postal code
73
+ # @return [String]
66
74
  def has_postal_code
67
- query_root_node("lgdo:hasPostalCode/text()", @@NAMESPACES)
75
+ query_root_node("lgdo:hasPostalCode/text()", @@NAMESPACES).to_s
68
76
  end
69
77
 
70
78
  # House number
79
+ # @return [String]
71
80
  def has_house_number
72
- query_root_node("lgdo:hasHouseNumber/text()", @@NAMESPACES)
81
+ query_root_node("lgdo:hasHouseNumber/text()", @@NAMESPACES).to_s
73
82
  end
74
83
 
75
84
  # Country in ISO 3166-1 alpha-2
85
+ # @return [String]
76
86
  def has_country
77
- query_root_node("lgdo:hasCountry/text()", @@NAMESPACES)
87
+ query_root_node("lgdo:hasCountry/text()", @@NAMESPACES).to_s
78
88
  end
79
89
 
80
90
  # City name
91
+ # @return [String]
81
92
  def has_city
82
- query_root_node("lgdo:hasCity/text()", @@NAMESPACES)
93
+ query_root_node("lgdo:hasCity/text()", @@NAMESPACES).to_s
83
94
  end
84
95
 
85
96
  # Is this place wheelchair friendly?
97
+ # @return [String]
86
98
  def wheelchair
87
- query_root_node("lgdo:wheelchair/@rdf:resource", @@NAMESPACES)
99
+ query_root_node("lgdo:wheelchair/@rdf:resource", @@NAMESPACES).to_s
88
100
  end
89
101
 
90
102
  # Query the root_node
@@ -95,6 +107,7 @@ module SemanticCrawler
95
107
  end
96
108
 
97
109
  # Outputs the document as XML
110
+ # @return [String] The document serialized as XML
98
111
  def xml_document
99
112
  @root_node.to_s
100
113
  end