data_kitten 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NDc5ZTUwMDA5OGJjMjg3YzI4MThmZWYzNTQ1NTkyNTNlN2JhZWU5Nw==
4
+ MTU4NmQ1MmU2YjJhY2U2NmVjOWE5NTBhOGM2YjNhNGQzMWIxYTU4MQ==
5
5
  data.tar.gz: !binary |-
6
- ZTkzNGQxYWIwOWU5NjliZThmZjE0Y2FkMWE1NmFhYTkwMTY2MGJhOQ==
6
+ ZjNhNWU4NzNlZjI4ODU3ZTRkYTgxZmY4MWM0NTA3OTQyNDNmMmJjMg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MGM1NjQ2ZDUzYzJiYjY5NzY1OTk1YzI0ZDZhZDQ3ZGIwZjdhNDVmYzRjNzM4
10
- ODc2MTUyYzFiOGNmNmNjNzExZjFmMTllZmFiNDMyZThiY2VmYjEyYThkZWMz
11
- ZmU2NjUyMDQ2MjVkYTNlYmNhNDBhOGYzYzE2Nzk3NjFmYTAyMTY=
9
+ ZjY0ODMyZmZjODMxNmU4NmNhMmI4N2YxMWMzMThhNGFlMTAxZmQ0ODA1OTcz
10
+ MjcwOWIwMmYyMmRiNDlmNTEyN2ExMWE4N2E4NWJlMDFlNjI3NDU4ZmZjZWFj
11
+ Yjk2MWZiZGJmODE5ZDVjMWQ0OTY2ODI4MWRiMzVlZmE5NjM0OGM=
12
12
  data.tar.gz: !binary |-
13
- ZWJhOTU2YTNkMmY3MzhiZjllNTY4YmY1MDFjNGZlZmY1MjIwODQ4ZDA5OGIz
14
- N2YwMjk4YzM5ZWUyM2VhODJjZjgzM2QzY2JjYjA5ZDFhMTUzM2U0MGU4ZDYz
15
- NjUxN2ExYTg0MTJlZTJkN2U2MTc1ZDZjMjBjOWQ5NzU3MWI1MGY=
13
+ MjY5NDZiZjliNThjZTk5NTQ4YzZlM2M4OGFiOGYzYjFjZDhmYzQ4NmRlMjJm
14
+ ZmE4MTMxZTMyZmQzNjBhODEyODZhODA3ZjIyMDUzNDFmMWJiMWRhNTBlMTU3
15
+ MTM2MTU4ZGY4YmZlMjcyM2VkYmM5Y2Q3NzY3YWJmNmNlODdlZTI=
@@ -49,7 +49,11 @@ module DataKitten
49
49
  def url
50
50
  @access_url.to_s
51
51
  end
52
-
52
+
53
+ def source
54
+ @access_url.as_json if @access_url.ok?
55
+ end
56
+
53
57
  # Can metadata be loaded for this Dataset?
54
58
  #
55
59
  # @return [Boolean] true if metadata can be loaded, false if it's
@@ -75,7 +79,15 @@ module DataKitten
75
79
  def host
76
80
  nil
77
81
  end
78
-
82
+
83
+ # A unique identifier of the dataset.
84
+ #
85
+ # @return [String] the identifier of the dataset
86
+ #
87
+ def identifier
88
+ nil
89
+ end
90
+
79
91
  # The human-readable title of the dataset.
80
92
  #
81
93
  # @return [String] the title of the dataset.
@@ -126,6 +138,13 @@ module DataKitten
126
138
  def modified
127
139
  nil
128
140
  end
141
+
142
+ # A web page that can be used to gain access to the dataset, its distributions and/or additional information.
143
+ #
144
+ # @return [String] The URL to the dataset
145
+ def landing_page
146
+ nil
147
+ end
129
148
 
130
149
  # The temporal coverage of the dataset
131
150
  #
@@ -191,7 +210,21 @@ module DataKitten
191
210
  def contributors
192
211
  []
193
212
  end
194
-
213
+
214
+ # The language of the dataset.
215
+ #
216
+ # @return [String] the language of the dataset
217
+ def language
218
+ nil
219
+ end
220
+
221
+ # The main category the dataset belongs to.
222
+ #
223
+ # @return [String]
224
+ def theme
225
+ nil
226
+ end
227
+
195
228
  # Has the data been crowdsourced?
196
229
  #
197
230
  # @return [Boolean] Whether the data has been crowdsourced or not.
@@ -229,5 +262,12 @@ module DataKitten
229
262
  []
230
263
  end
231
264
 
265
+ # Spatial coverage of the dataset
266
+ #
267
+ # @return [GeoJSON Geometry] A GeoJSON geometry object of the spatial coverage
268
+ def spatial
269
+ nil
270
+ end
271
+
232
272
  end
233
273
  end
@@ -14,8 +14,11 @@ module DataKitten
14
14
  # @!attribute access_url
15
15
  # @return [String] a URL to access the distribution.
16
16
  attr_accessor :access_url
17
- alias_method :uri, :access_url
18
- alias_method :download_url, :access_url
17
+
18
+ # @!attribute download_url
19
+ # @return [String] a URL to the file of the distribution.
20
+ attr_accessor :download_url
21
+ alias_method :uri, :download_url
19
22
 
20
23
  # @!attribute path
21
24
  # @return [String] the path of the distribution within the source, if appropriate
@@ -29,6 +32,22 @@ module DataKitten
29
32
  # @return [String] a textual description
30
33
  attr_accessor :description
31
34
 
35
+ # @!attribute issued
36
+ # @return [Date] date created
37
+ attr_accessor :issued
38
+
39
+ # @!attribute modified
40
+ # @return [Date] date modified
41
+ attr_accessor :modified
42
+
43
+ # @!attribute byte_size
44
+ # @return [Integer] size of file in bytes
45
+ attr_accessor :byte_size
46
+
47
+ # @!attribute media_type
48
+ # @return [String] the IANA media type (MIME type) of the distribution
49
+ attr_accessor :media_type
50
+
32
51
  # @!attribute schema
33
52
  # @return [Hash] a hash representing the schema of the data within the distribution. Will
34
53
  # change to a more structured object later.
@@ -65,7 +84,7 @@ module DataKitten
65
84
  @schema = r['schema']
66
85
  # Get path
67
86
  @path = r['path']
68
- @access_url = r['url']
87
+ @download_url = r['url']
69
88
  # Set title
70
89
  @title = @path || @uri
71
90
  elsif r = options[:dcat_resource]
@@ -73,13 +92,18 @@ module DataKitten
73
92
  @description = r[:title]
74
93
  @access_url = r[:accessURL]
75
94
  elsif r = options[:ckan_resource]
76
- @title = r[:title]
77
- @description = r[:title]
78
- @access_url = r[:accessURL]
79
- @extension = r[:format]
95
+ @title = r[:title]
96
+ @description = r[:title]
97
+ @issued = r[:issued]
98
+ @modified = r[:modified]
99
+ @access_url = r[:accessURL]
100
+ @download_url = r[:downloadURL]
101
+ @byte_size = r[:byteSize]
102
+ @media_type = r[:mediaType]
103
+ @extension = r[:format]
80
104
  # Load HTTP Response for further use
81
105
  @format = r[:format] ? DistributionFormat.new(self) : nil
82
- end
106
+ end
83
107
  # Set default CSV dialect
84
108
  @dialect ||= {
85
109
  "delimiter" => ","
@@ -112,7 +136,7 @@ module DataKitten
112
136
  #
113
137
  # @return [Boolean] whether the HTTP response returns a success code or not
114
138
  def exists?
115
- if @access_url
139
+ if @download_url
116
140
  http_head.response_code != 404
117
141
  end
118
142
  end
@@ -124,8 +148,8 @@ module DataKitten
124
148
  @data ||= begin
125
149
  if @path
126
150
  datafile = @dataset.send(:load_file, @path)
127
- elsif @access_url
128
- datafile = RestClient.get @access_url rescue nil
151
+ elsif @download_url
152
+ datafile = RestClient.get @download_url rescue nil
129
153
  end
130
154
  if datafile
131
155
  case format.extension
@@ -147,9 +171,9 @@ module DataKitten
147
171
  end
148
172
 
149
173
  def http_head
150
- if @access_url
174
+ if @download_url
151
175
  @http_head ||= begin
152
- Curl::Easy.http_head(@access_url) do |c|
176
+ Curl::Easy.http_head(@download_url) do |c|
153
177
  c.follow_location = true
154
178
  c.useragent = "curb"
155
179
  end
@@ -21,6 +21,7 @@ module DataKitten
21
21
  @@formats ||= {
22
22
  csv: { structured: true, open: true },
23
23
  xls: { structured: true, open: false },
24
+ xlsx: { structured: true, open: true },
24
25
  rdf: { structured: true, open: true },
25
26
  xml: { structured: true, open: true },
26
27
  wms: { structured: true, open: true },
@@ -33,6 +34,7 @@ module DataKitten
33
34
  sparql: { structured: true, open: true },
34
35
  kml: { structured: true, open: true },
35
36
  georss: { structured: true, open: true },
37
+ geojson: { structured: true, open: true },
36
38
  shp: { structured: true, open: true },
37
39
  html: { structured: false, open: true },
38
40
  doc: { structured: false, open: false },
@@ -3,6 +3,23 @@ module DataKitten
3
3
  # A license for a {Dataset} or {Distribution}
4
4
  #
5
5
  class License
6
+
7
+ LICENSES = {
8
+ /opendatacommons.org.*\/by(\/|$)/ => "odc-by",
9
+ /opendatacommons.org.*\/odbl(\/|$)/ => "odc-odbl",
10
+ /opendatacommons.org.*\/pddl(\/|$)/ => "odc-pddl",
11
+ /opendefinition.org.*\/odc-by(\/|$)/ => "odc-by",
12
+ /opendefinition.org.*\/odc-pddl(\/|$)/ => "odc-pddl",
13
+ /opendefinition.org.*\/cc-zero(\/|$)/ => "cc-zero",
14
+ /opendefinition.org.*\/cc-by(\/|$)/ => "cc-by",
15
+ /opendefinition.org.*\/cc-by-sa(\/|$)/ => "cc-by-sa",
16
+ /opendefinition.org.*\/gfdl(\/|$)/ => "gfdl",
17
+ /creativecommons.org.*\/zero(\/|$)/ => "cc-zero",
18
+ /creativecommons.org.*\/by-sa(\/|$)/ => "cc-by-sa",
19
+ /creativecommons.org.*\/by(\/|$)/ => "cc-by",
20
+ /(data|nationalarchives).gov.uk.*\/open-government-licence(\/|$)/ => "ogl-uk",
21
+ /usa.gov\/publicdomain(\/|$)/ => "us-pd"
22
+ }
6
23
 
7
24
  # @!attribute is
8
25
  # @return [String] a short ID that identifies the license.
@@ -19,6 +36,10 @@ module DataKitten
19
36
  # @!attribute type
20
37
  # @return [String] the type of information this license applies to. Could be +:data+ or +:content+.
21
38
  attr_accessor :type
39
+
40
+ # @!attribute abbr
41
+ # @return [String] the license abbreviation
42
+ attr_accessor :abbr
22
43
 
23
44
  # Create a new License object.
24
45
  #
@@ -32,8 +53,14 @@ module DataKitten
32
53
  @name = options[:name]
33
54
  @uri = options[:uri]
34
55
  @type = options[:type]
56
+ @abbr = get_license_abbr(@uri) if @uri
57
+ end
58
+
59
+ def get_license_abbr(uri)
60
+ license = LICENSES.find { |regex, abbr| uri =~ regex }
61
+ license.last if license
35
62
  end
36
63
 
37
64
  end
38
65
 
39
- end
66
+ end
@@ -1,3 +1,5 @@
1
+ require 'data_kitten/utils/guessable_lookup.rb'
2
+
1
3
  module DataKitten
2
4
 
3
5
  module PublishingFormats
@@ -26,6 +28,7 @@ module DataKitten
26
28
  @@id = result["result"]["id"] rescue result["id"]
27
29
  end
28
30
  @@metadata = JSON.parse RestClient.get "#{uri.scheme}://#{uri.host}/api/rest/package/#{@@id}"
31
+ @@metadata.extend(GuessableLookup)
29
32
  rescue
30
33
  false
31
34
  end
@@ -43,14 +46,32 @@ module DataKitten
43
46
  #
44
47
  # @see Dataset#data_title
45
48
  def data_title
46
- metadata["title"] rescue nil
49
+ metadata.lookup("title")
47
50
  end
48
51
 
49
52
  # A brief description of the dataset
50
53
  #
51
54
  # @see Dataset#description
52
55
  def description
53
- metadata["notes"] rescue nil
56
+ metadata.lookup("notes") || metadata.lookup("description")
57
+ rescue
58
+ nil
59
+ end
60
+
61
+ # An identifier for the dataset
62
+ #
63
+ # @see Dataset#identifier
64
+ def identifier
65
+ metadata.lookup("name") || @@id
66
+ end
67
+
68
+ # A web page which can be used to gain access to the dataset
69
+ #
70
+ # @see Dataset#landing_page
71
+ def landing_page
72
+ metadata.lookup("extras", "landing_page") ||
73
+ metadata.lookup("url") ||
74
+ metadata.lookup("ckan_url")
54
75
  end
55
76
 
56
77
  # Keywords for the dataset
@@ -58,7 +79,7 @@ module DataKitten
58
79
  # @see Dataset#keywords
59
80
  def keywords
60
81
  keywords = []
61
- metadata["tags"].each do |tag|
82
+ metadata.lookup("tags").each do |tag|
62
83
  keywords << tag
63
84
  end
64
85
  return keywords
@@ -70,7 +91,7 @@ module DataKitten
70
91
  #
71
92
  # @see Dataset#publishers
72
93
  def publishers
73
- id = metadata['organization']['id'] || metadata['groups'][0]
94
+ id = metadata.lookup('organization', 'id') || metadata.lookup('groups', 0)
74
95
  fetch_publisher(id)
75
96
  rescue
76
97
  []
@@ -88,10 +109,9 @@ module DataKitten
88
109
  #
89
110
  # @see Dataset#licenses
90
111
  def licenses
91
- extras = metadata["extras"] || {}
92
- id = metadata["license_id"]
93
- uri = metadata["license_url"] || extras["licence_url"]
94
- name = metadata["license_title"] || extras["licence_url_title"]
112
+ id = metadata.lookup("license_id")
113
+ uri = metadata.lookup("license_url") || metadata.lookup("extras", "licence_url")
114
+ name = metadata.lookup("license_title") || metadata.lookup("extras", "licence_url_title")
95
115
  if [id, uri, name].any?
96
116
  [License.new(:id => id, :uri => uri, :name => name)]
97
117
  else
@@ -104,12 +124,17 @@ module DataKitten
104
124
  # @see Dataset#distributions
105
125
  def distributions
106
126
  distributions = []
107
- metadata["resources"].each do |resource|
127
+ metadata.lookup("resources").each do |resource|
108
128
  distribution = {
109
129
  :title => resource["description"],
110
- :accessURL => resource["url"],
111
- :format => resource["format"]
130
+ :accessURL => landing_page,
131
+ :downloadURL => resource["url"],
132
+ :format => resource["format"],
133
+ :mediaType => resource["mimetype"] || resource["content_type"],
112
134
  }
135
+ distribution[:issued] = Date.parse(resource["created"]) rescue nil
136
+ distribution[:modified] = Date.parse(resource["last_modified"] || resource["revision_timestamp"]) rescue nil
137
+ distribution[:byteSize] = Integer(resource["size"]) rescue nil
113
138
  distributions << Distribution.new(self, ckan_resource: distribution)
114
139
  end
115
140
  return distributions
@@ -121,32 +146,68 @@ module DataKitten
121
146
  #
122
147
  # @see Dataset#update_frequency
123
148
  def update_frequency
124
- metadata["extras"]["update_frequency"] || metadata["extras"]["frequency-of-update"] rescue nil
149
+ metadata.lookup("extras", "update_frequency") ||
150
+ metadata.lookup("extras", "frequency-of-update") ||
151
+ metadata.lookup("extras", "accrual_periodicity")
152
+ rescue
153
+ nil
125
154
  end
126
155
 
127
156
  # Date the dataset was released
128
157
  #
129
158
  # @see Dataset#issued
130
159
  def issued
131
- Date.parse metadata["metadata_created"] rescue nil
160
+ Date.parse metadata.lookup("metadata_created") rescue nil
132
161
  end
133
162
 
134
163
  # Date the dataset was modified
135
164
  #
136
165
  # @see Dataset#modified
137
166
  def modified
138
- Date.parse metadata["metadata_modified"] rescue nil
167
+ Date.parse metadata.lookup("metadata_modified") rescue nil
139
168
  end
140
169
 
141
170
  # The temporal coverage of the dataset
142
171
  #
143
172
  # @see Dataset#temporal
144
173
  def temporal
145
- start_date = Date.parse metadata["extras"]["temporal_coverage-from"] rescue nil
146
- end_date = Date.parse metadata["extras"]["temporal_coverage-to"] rescue nil
174
+ from = metadata.lookup("extras", "temporal_coverage-from") ||
175
+ metadata.lookup("extras", "temporal-extent-begin")
176
+ to = metadata.lookup("extras", "temporal_coverage-to") ||
177
+ metadata.lookup("extras", "temporal-extent-end")
178
+ start_date = Date.parse from rescue nil
179
+ end_date = Date.parse to rescue nil
147
180
  Temporal.new(:start => start_date, :end => end_date)
148
181
  end
149
182
 
183
+ # The language of the dataset
184
+ #
185
+ # @see Dataset#language
186
+ def language
187
+ metadata.lookup("language") ||
188
+ metadata.lookup("metadata_language") ||
189
+ metadata.lookup("extras", "metadata_language") ||
190
+ metadata.lookup("extras", "language", 0) ||
191
+ metadata.lookup("extras", "language")
192
+ end
193
+
194
+ # The main category of the dataset
195
+ #
196
+ # @see Dataset#theme
197
+ def theme
198
+ metadata.lookup("extras", "theme", 0) ||
199
+ metadata.lookup("extras", "theme-primary") ||
200
+ metadata.lookup("groups", 0, "name") ||
201
+ metadata.lookup("groups", 0)
202
+ end
203
+
204
+ # Spatial coverage of the dataset
205
+ #
206
+ # @see Dataset#spatial
207
+ def spatial
208
+ extract_spatial || extract_bbox
209
+ end
210
+
150
211
  private
151
212
 
152
213
  def metadata
@@ -161,17 +222,45 @@ module DataKitten
161
222
  extra
162
223
  end
163
224
 
225
+ def extract_spatial
226
+ geometry = JSON.parse metadata.lookup("extras", "spatial")
227
+ return geometry if !geometry["type"].nil?
228
+ rescue
229
+ nil
230
+ end
231
+
232
+ def extract_bbox
233
+ west = Float(metadata.lookup("extras", "bbox-west-long"))
234
+ east = Float(metadata.lookup("extras", "bbox-east-long"))
235
+ north = Float(metadata.lookup("extras", "bbox-north-lat"))
236
+ south = Float(metadata.lookup("extras", "bbox-south-lat"))
237
+
238
+ { "type" => "Polygon", "coordinates" => [
239
+ [
240
+ [west, north],
241
+ [east, north],
242
+ [east, south],
243
+ [west, south],
244
+ [west, north]
245
+ ]
246
+ ] }
247
+ rescue
248
+ nil
249
+ end
250
+
164
251
  def fetch_publisher(id)
165
252
  uri = parsed_uri
166
253
  [
167
- "#{uri.scheme}://#{uri.host}/api/rest/group/#{id}",
254
+ "#{uri.scheme}://#{uri.host}/api/3/action/organization_show?id=#{id}",
168
255
  "#{uri.scheme}://#{uri.host}/api/3/action/group_show?id=#{id}",
169
- "#{uri.scheme}://#{uri.host}/api/3/action/organization_show?id=#{id}"
256
+ "#{uri.scheme}://#{uri.host}/api/rest/group/#{id}"
170
257
  ].each do |uri|
171
258
  begin
172
259
  @group = JSON.parse RestClient.get uri
173
260
  break
174
- rescue RestClient::ResourceNotFound
261
+ rescue
262
+ # FakeWeb raises FakeWeb::NetConnectNotAllowedError, whereas
263
+ # RestClient raises RestClient::ResourceNotFound in the "real world".
175
264
  nil
176
265
  end
177
266
  end
@@ -190,8 +279,8 @@ module DataKitten
190
279
  end
191
280
 
192
281
  def extract_agent(name_field, email_field)
193
- name = metadata[name_field]
194
- email = metadata[email_field]
282
+ name = metadata.lookup(name_field)
283
+ email = metadata.lookup(email_field)
195
284
  if [name, email].any?
196
285
  [Agent.new(name: name, mbox: email)]
197
286
  else
@@ -0,0 +1,38 @@
1
+ module GuessableLookup
2
+
3
+ def lookup(*path)
4
+ data = self
5
+ path.each { |key| data = guess_key(data, key) }
6
+ data
7
+ rescue
8
+ nil
9
+ end
10
+
11
+ private
12
+
13
+ # Guesses which key you want from a hash and returns the value of it.
14
+ #
15
+ # It returns the value of the original key if it exists in the hash, otherwise
16
+ # tries to find a similar key, and if it fails it returns nil.
17
+ # Similar keys are ones which use '_', '-' or '' as word separators & are
18
+ # case-insensitive.
19
+ #
20
+ # @example
21
+ # guess_key({:a_key => true}, 'a_key') # => true
22
+ # guess_key({:aKey => true}, 'a_key') # => true
23
+ # guess_key({"a-KEY" => true}, 'a_key') # => true
24
+ #
25
+ # @param data [Hash]
26
+ # @param key [String] The desired key
27
+ # @return The value of the guessed key
28
+ #
29
+ def guess_key(data, key)
30
+ return data[key] if key.is_a?(Fixnum) || data.keys.include?(key)
31
+ likeKey = key.gsub(/[\_\-]/, "[\_\-]?")
32
+ key = data.keys.select { |k| k =~ /^#{likeKey}$/i }.first
33
+ data[key]
34
+ rescue
35
+ nil
36
+ end
37
+
38
+ end
@@ -1,3 +1,3 @@
1
1
  module DataKitten
2
- VERSION = "1.2.0"
2
+ VERSION = "1.3.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kitten
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Smith
@@ -214,6 +214,7 @@ files:
214
214
  - lib/data_kitten/rights.rb
215
215
  - lib/data_kitten/source.rb
216
216
  - lib/data_kitten/temporal.rb
217
+ - lib/data_kitten/utils/guessable_lookup.rb
217
218
  - lib/data_kitten/version.rb
218
219
  homepage: http://github.com/data-kitten
219
220
  licenses: