data_kitten 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NDc5ZTUwMDA5OGJjMjg3YzI4MThmZWYzNTQ1NTkyNTNlN2JhZWU5Nw==
4
+ MTU4NmQ1MmU2YjJhY2U2NmVjOWE5NTBhOGM2YjNhNGQzMWIxYTU4MQ==
5
5
  data.tar.gz: !binary |-
6
- ZTkzNGQxYWIwOWU5NjliZThmZjE0Y2FkMWE1NmFhYTkwMTY2MGJhOQ==
6
+ ZjNhNWU4NzNlZjI4ODU3ZTRkYTgxZmY4MWM0NTA3OTQyNDNmMmJjMg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MGM1NjQ2ZDUzYzJiYjY5NzY1OTk1YzI0ZDZhZDQ3ZGIwZjdhNDVmYzRjNzM4
10
- ODc2MTUyYzFiOGNmNmNjNzExZjFmMTllZmFiNDMyZThiY2VmYjEyYThkZWMz
11
- ZmU2NjUyMDQ2MjVkYTNlYmNhNDBhOGYzYzE2Nzk3NjFmYTAyMTY=
9
+ ZjY0ODMyZmZjODMxNmU4NmNhMmI4N2YxMWMzMThhNGFlMTAxZmQ0ODA1OTcz
10
+ MjcwOWIwMmYyMmRiNDlmNTEyN2ExMWE4N2E4NWJlMDFlNjI3NDU4ZmZjZWFj
11
+ Yjk2MWZiZGJmODE5ZDVjMWQ0OTY2ODI4MWRiMzVlZmE5NjM0OGM=
12
12
  data.tar.gz: !binary |-
13
- ZWJhOTU2YTNkMmY3MzhiZjllNTY4YmY1MDFjNGZlZmY1MjIwODQ4ZDA5OGIz
14
- N2YwMjk4YzM5ZWUyM2VhODJjZjgzM2QzY2JjYjA5ZDFhMTUzM2U0MGU4ZDYz
15
- NjUxN2ExYTg0MTJlZTJkN2U2MTc1ZDZjMjBjOWQ5NzU3MWI1MGY=
13
+ MjY5NDZiZjliNThjZTk5NTQ4YzZlM2M4OGFiOGYzYjFjZDhmYzQ4NmRlMjJm
14
+ ZmE4MTMxZTMyZmQzNjBhODEyODZhODA3ZjIyMDUzNDFmMWJiMWRhNTBlMTU3
15
+ MTM2MTU4ZGY4YmZlMjcyM2VkYmM5Y2Q3NzY3YWJmNmNlODdlZTI=
@@ -49,7 +49,11 @@ module DataKitten
49
49
  def url
50
50
  @access_url.to_s
51
51
  end
52
-
52
+
53
+ def source
54
+ @access_url.as_json if @access_url.ok?
55
+ end
56
+
53
57
  # Can metadata be loaded for this Dataset?
54
58
  #
55
59
  # @return [Boolean] true if metadata can be loaded, false if it's
@@ -75,7 +79,15 @@ module DataKitten
75
79
  def host
76
80
  nil
77
81
  end
78
-
82
+
83
+ # A unique identifier of the dataset.
84
+ #
85
+ # @return [String] the identifier of the dataset
86
+ #
87
+ def identifier
88
+ nil
89
+ end
90
+
79
91
  # The human-readable title of the dataset.
80
92
  #
81
93
  # @return [String] the title of the dataset.
@@ -126,6 +138,13 @@ module DataKitten
126
138
  def modified
127
139
  nil
128
140
  end
141
+
142
+ # A web page that can be used to gain access to the dataset, its distributions and/or additional information.
143
+ #
144
+ # @return [String] The URL to the dataset
145
+ def landing_page
146
+ nil
147
+ end
129
148
 
130
149
  # The temporal coverage of the dataset
131
150
  #
@@ -191,7 +210,21 @@ module DataKitten
191
210
  def contributors
192
211
  []
193
212
  end
194
-
213
+
214
+ # The language of the dataset.
215
+ #
216
+ # @return [String] the language of the dataset
217
+ def language
218
+ nil
219
+ end
220
+
221
+ # The main category the dataset belongs to.
222
+ #
223
+ # @return [String]
224
+ def theme
225
+ nil
226
+ end
227
+
195
228
  # Has the data been crowdsourced?
196
229
  #
197
230
  # @return [Boolean] Whether the data has been crowdsourced or not.
@@ -229,5 +262,12 @@ module DataKitten
229
262
  []
230
263
  end
231
264
 
265
+ # Spatial coverage of the dataset
266
+ #
267
+ # @return [GeoJSON Geometry] A GeoJSON geometry object of the spatial coverage
268
+ def spatial
269
+ nil
270
+ end
271
+
232
272
  end
233
273
  end
@@ -14,8 +14,11 @@ module DataKitten
14
14
  # @!attribute access_url
15
15
  # @return [String] a URL to access the distribution.
16
16
  attr_accessor :access_url
17
- alias_method :uri, :access_url
18
- alias_method :download_url, :access_url
17
+
18
+ # @!attribute download_url
19
+ # @return [String] a URL to the file of the distribution.
20
+ attr_accessor :download_url
21
+ alias_method :uri, :download_url
19
22
 
20
23
  # @!attribute path
21
24
  # @return [String] the path of the distribution within the source, if appropriate
@@ -29,6 +32,22 @@ module DataKitten
29
32
  # @return [String] a textual description
30
33
  attr_accessor :description
31
34
 
35
+ # @!attribute issued
36
+ # @return [Date] date created
37
+ attr_accessor :issued
38
+
39
+ # @!attribute modified
40
+ # @return [Date] date modified
41
+ attr_accessor :modified
42
+
43
+ # @!attribute byte_size
44
+ # @return [Integer] size of file in bytes
45
+ attr_accessor :byte_size
46
+
47
+ # @!attribute media_type
48
+ # @return [String] the IANA media type (MIME type) of the distribution
49
+ attr_accessor :media_type
50
+
32
51
  # @!attribute schema
33
52
  # @return [Hash] a hash representing the schema of the data within the distribution. Will
34
53
  # change to a more structured object later.
@@ -65,7 +84,7 @@ module DataKitten
65
84
  @schema = r['schema']
66
85
  # Get path
67
86
  @path = r['path']
68
- @access_url = r['url']
87
+ @download_url = r['url']
69
88
  # Set title
70
89
  @title = @path || @uri
71
90
  elsif r = options[:dcat_resource]
@@ -73,13 +92,18 @@ module DataKitten
73
92
  @description = r[:title]
74
93
  @access_url = r[:accessURL]
75
94
  elsif r = options[:ckan_resource]
76
- @title = r[:title]
77
- @description = r[:title]
78
- @access_url = r[:accessURL]
79
- @extension = r[:format]
95
+ @title = r[:title]
96
+ @description = r[:title]
97
+ @issued = r[:issued]
98
+ @modified = r[:modified]
99
+ @access_url = r[:accessURL]
100
+ @download_url = r[:downloadURL]
101
+ @byte_size = r[:byteSize]
102
+ @media_type = r[:mediaType]
103
+ @extension = r[:format]
80
104
  # Load HTTP Response for further use
81
105
  @format = r[:format] ? DistributionFormat.new(self) : nil
82
- end
106
+ end
83
107
  # Set default CSV dialect
84
108
  @dialect ||= {
85
109
  "delimiter" => ","
@@ -112,7 +136,7 @@ module DataKitten
112
136
  #
113
137
  # @return [Boolean] whether the HTTP response returns a success code or not
114
138
  def exists?
115
- if @access_url
139
+ if @download_url
116
140
  http_head.response_code != 404
117
141
  end
118
142
  end
@@ -124,8 +148,8 @@ module DataKitten
124
148
  @data ||= begin
125
149
  if @path
126
150
  datafile = @dataset.send(:load_file, @path)
127
- elsif @access_url
128
- datafile = RestClient.get @access_url rescue nil
151
+ elsif @download_url
152
+ datafile = RestClient.get @download_url rescue nil
129
153
  end
130
154
  if datafile
131
155
  case format.extension
@@ -147,9 +171,9 @@ module DataKitten
147
171
  end
148
172
 
149
173
  def http_head
150
- if @access_url
174
+ if @download_url
151
175
  @http_head ||= begin
152
- Curl::Easy.http_head(@access_url) do |c|
176
+ Curl::Easy.http_head(@download_url) do |c|
153
177
  c.follow_location = true
154
178
  c.useragent = "curb"
155
179
  end
@@ -21,6 +21,7 @@ module DataKitten
21
21
  @@formats ||= {
22
22
  csv: { structured: true, open: true },
23
23
  xls: { structured: true, open: false },
24
+ xlsx: { structured: true, open: true },
24
25
  rdf: { structured: true, open: true },
25
26
  xml: { structured: true, open: true },
26
27
  wms: { structured: true, open: true },
@@ -33,6 +34,7 @@ module DataKitten
33
34
  sparql: { structured: true, open: true },
34
35
  kml: { structured: true, open: true },
35
36
  georss: { structured: true, open: true },
37
+ geojson: { structured: true, open: true },
36
38
  shp: { structured: true, open: true },
37
39
  html: { structured: false, open: true },
38
40
  doc: { structured: false, open: false },
@@ -3,6 +3,23 @@ module DataKitten
3
3
  # A license for a {Dataset} or {Distribution}
4
4
  #
5
5
  class License
6
+
7
+ LICENSES = {
8
+ /opendatacommons.org.*\/by(\/|$)/ => "odc-by",
9
+ /opendatacommons.org.*\/odbl(\/|$)/ => "odc-odbl",
10
+ /opendatacommons.org.*\/pddl(\/|$)/ => "odc-pddl",
11
+ /opendefinition.org.*\/odc-by(\/|$)/ => "odc-by",
12
+ /opendefinition.org.*\/odc-pddl(\/|$)/ => "odc-pddl",
13
+ /opendefinition.org.*\/cc-zero(\/|$)/ => "cc-zero",
14
+ /opendefinition.org.*\/cc-by(\/|$)/ => "cc-by",
15
+ /opendefinition.org.*\/cc-by-sa(\/|$)/ => "cc-by-sa",
16
+ /opendefinition.org.*\/gfdl(\/|$)/ => "gfdl",
17
+ /creativecommons.org.*\/zero(\/|$)/ => "cc-zero",
18
+ /creativecommons.org.*\/by-sa(\/|$)/ => "cc-by-sa",
19
+ /creativecommons.org.*\/by(\/|$)/ => "cc-by",
20
+ /(data|nationalarchives).gov.uk.*\/open-government-licence(\/|$)/ => "ogl-uk",
21
+ /usa.gov\/publicdomain(\/|$)/ => "us-pd"
22
+ }
6
23
 
7
24
  # @!attribute is
8
25
  # @return [String] a short ID that identifies the license.
@@ -19,6 +36,10 @@ module DataKitten
19
36
  # @!attribute type
20
37
  # @return [String] the type of information this license applies to. Could be +:data+ or +:content+.
21
38
  attr_accessor :type
39
+
40
+ # @!attribute abbr
41
+ # @return [String] the license abbreviation
42
+ attr_accessor :abbr
22
43
 
23
44
  # Create a new License object.
24
45
  #
@@ -32,8 +53,14 @@ module DataKitten
32
53
  @name = options[:name]
33
54
  @uri = options[:uri]
34
55
  @type = options[:type]
56
+ @abbr = get_license_abbr(@uri) if @uri
57
+ end
58
+
59
+ def get_license_abbr(uri)
60
+ license = LICENSES.find { |regex, abbr| uri =~ regex }
61
+ license.last if license
35
62
  end
36
63
 
37
64
  end
38
65
 
39
- end
66
+ end
@@ -1,3 +1,5 @@
1
+ require 'data_kitten/utils/guessable_lookup.rb'
2
+
1
3
  module DataKitten
2
4
 
3
5
  module PublishingFormats
@@ -26,6 +28,7 @@ module DataKitten
26
28
  @@id = result["result"]["id"] rescue result["id"]
27
29
  end
28
30
  @@metadata = JSON.parse RestClient.get "#{uri.scheme}://#{uri.host}/api/rest/package/#{@@id}"
31
+ @@metadata.extend(GuessableLookup)
29
32
  rescue
30
33
  false
31
34
  end
@@ -43,14 +46,32 @@ module DataKitten
43
46
  #
44
47
  # @see Dataset#data_title
45
48
  def data_title
46
- metadata["title"] rescue nil
49
+ metadata.lookup("title")
47
50
  end
48
51
 
49
52
  # A brief description of the dataset
50
53
  #
51
54
  # @see Dataset#description
52
55
  def description
53
- metadata["notes"] rescue nil
56
+ metadata.lookup("notes") || metadata.lookup("description")
57
+ rescue
58
+ nil
59
+ end
60
+
61
+ # An identifier for the dataset
62
+ #
63
+ # @see Dataset#identifier
64
+ def identifier
65
+ metadata.lookup("name") || @@id
66
+ end
67
+
68
+ # A web page which can be used to gain access to the dataset
69
+ #
70
+ # @see Dataset#landing_page
71
+ def landing_page
72
+ metadata.lookup("extras", "landing_page") ||
73
+ metadata.lookup("url") ||
74
+ metadata.lookup("ckan_url")
54
75
  end
55
76
 
56
77
  # Keywords for the dataset
@@ -58,7 +79,7 @@ module DataKitten
58
79
  # @see Dataset#keywords
59
80
  def keywords
60
81
  keywords = []
61
- metadata["tags"].each do |tag|
82
+ metadata.lookup("tags").each do |tag|
62
83
  keywords << tag
63
84
  end
64
85
  return keywords
@@ -70,7 +91,7 @@ module DataKitten
70
91
  #
71
92
  # @see Dataset#publishers
72
93
  def publishers
73
- id = metadata['organization']['id'] || metadata['groups'][0]
94
+ id = metadata.lookup('organization', 'id') || metadata.lookup('groups', 0)
74
95
  fetch_publisher(id)
75
96
  rescue
76
97
  []
@@ -88,10 +109,9 @@ module DataKitten
88
109
  #
89
110
  # @see Dataset#licenses
90
111
  def licenses
91
- extras = metadata["extras"] || {}
92
- id = metadata["license_id"]
93
- uri = metadata["license_url"] || extras["licence_url"]
94
- name = metadata["license_title"] || extras["licence_url_title"]
112
+ id = metadata.lookup("license_id")
113
+ uri = metadata.lookup("license_url") || metadata.lookup("extras", "licence_url")
114
+ name = metadata.lookup("license_title") || metadata.lookup("extras", "licence_url_title")
95
115
  if [id, uri, name].any?
96
116
  [License.new(:id => id, :uri => uri, :name => name)]
97
117
  else
@@ -104,12 +124,17 @@ module DataKitten
104
124
  # @see Dataset#distributions
105
125
  def distributions
106
126
  distributions = []
107
- metadata["resources"].each do |resource|
127
+ metadata.lookup("resources").each do |resource|
108
128
  distribution = {
109
129
  :title => resource["description"],
110
- :accessURL => resource["url"],
111
- :format => resource["format"]
130
+ :accessURL => landing_page,
131
+ :downloadURL => resource["url"],
132
+ :format => resource["format"],
133
+ :mediaType => resource["mimetype"] || resource["content_type"],
112
134
  }
135
+ distribution[:issued] = Date.parse(resource["created"]) rescue nil
136
+ distribution[:modified] = Date.parse(resource["last_modified"] || resource["revision_timestamp"]) rescue nil
137
+ distribution[:byteSize] = Integer(resource["size"]) rescue nil
113
138
  distributions << Distribution.new(self, ckan_resource: distribution)
114
139
  end
115
140
  return distributions
@@ -121,32 +146,68 @@ module DataKitten
121
146
  #
122
147
  # @see Dataset#update_frequency
123
148
  def update_frequency
124
- metadata["extras"]["update_frequency"] || metadata["extras"]["frequency-of-update"] rescue nil
149
+ metadata.lookup("extras", "update_frequency") ||
150
+ metadata.lookup("extras", "frequency-of-update") ||
151
+ metadata.lookup("extras", "accrual_periodicity")
152
+ rescue
153
+ nil
125
154
  end
126
155
 
127
156
  # Date the dataset was released
128
157
  #
129
158
  # @see Dataset#issued
130
159
  def issued
131
- Date.parse metadata["metadata_created"] rescue nil
160
+ Date.parse metadata.lookup("metadata_created") rescue nil
132
161
  end
133
162
 
134
163
  # Date the dataset was modified
135
164
  #
136
165
  # @see Dataset#modified
137
166
  def modified
138
- Date.parse metadata["metadata_modified"] rescue nil
167
+ Date.parse metadata.lookup("metadata_modified") rescue nil
139
168
  end
140
169
 
141
170
  # The temporal coverage of the dataset
142
171
  #
143
172
  # @see Dataset#temporal
144
173
  def temporal
145
- start_date = Date.parse metadata["extras"]["temporal_coverage-from"] rescue nil
146
- end_date = Date.parse metadata["extras"]["temporal_coverage-to"] rescue nil
174
+ from = metadata.lookup("extras", "temporal_coverage-from") ||
175
+ metadata.lookup("extras", "temporal-extent-begin")
176
+ to = metadata.lookup("extras", "temporal_coverage-to") ||
177
+ metadata.lookup("extras", "temporal-extent-end")
178
+ start_date = Date.parse from rescue nil
179
+ end_date = Date.parse to rescue nil
147
180
  Temporal.new(:start => start_date, :end => end_date)
148
181
  end
149
182
 
183
+ # The language of the dataset
184
+ #
185
+ # @see Dataset#language
186
+ def language
187
+ metadata.lookup("language") ||
188
+ metadata.lookup("metadata_language") ||
189
+ metadata.lookup("extras", "metadata_language") ||
190
+ metadata.lookup("extras", "language", 0) ||
191
+ metadata.lookup("extras", "language")
192
+ end
193
+
194
+ # The main category of the dataset
195
+ #
196
+ # @see Dataset#theme
197
+ def theme
198
+ metadata.lookup("extras", "theme", 0) ||
199
+ metadata.lookup("extras", "theme-primary") ||
200
+ metadata.lookup("groups", 0, "name") ||
201
+ metadata.lookup("groups", 0)
202
+ end
203
+
204
+ # Spatial coverage of the dataset
205
+ #
206
+ # @see Dataset#spatial
207
+ def spatial
208
+ extract_spatial || extract_bbox
209
+ end
210
+
150
211
  private
151
212
 
152
213
  def metadata
@@ -161,17 +222,45 @@ module DataKitten
161
222
  extra
162
223
  end
163
224
 
225
+ def extract_spatial
226
+ geometry = JSON.parse metadata.lookup("extras", "spatial")
227
+ return geometry if !geometry["type"].nil?
228
+ rescue
229
+ nil
230
+ end
231
+
232
+ def extract_bbox
233
+ west = Float(metadata.lookup("extras", "bbox-west-long"))
234
+ east = Float(metadata.lookup("extras", "bbox-east-long"))
235
+ north = Float(metadata.lookup("extras", "bbox-north-lat"))
236
+ south = Float(metadata.lookup("extras", "bbox-south-lat"))
237
+
238
+ { "type" => "Polygon", "coordinates" => [
239
+ [
240
+ [west, north],
241
+ [east, north],
242
+ [east, south],
243
+ [west, south],
244
+ [west, north]
245
+ ]
246
+ ] }
247
+ rescue
248
+ nil
249
+ end
250
+
164
251
  def fetch_publisher(id)
165
252
  uri = parsed_uri
166
253
  [
167
- "#{uri.scheme}://#{uri.host}/api/rest/group/#{id}",
254
+ "#{uri.scheme}://#{uri.host}/api/3/action/organization_show?id=#{id}",
168
255
  "#{uri.scheme}://#{uri.host}/api/3/action/group_show?id=#{id}",
169
- "#{uri.scheme}://#{uri.host}/api/3/action/organization_show?id=#{id}"
256
+ "#{uri.scheme}://#{uri.host}/api/rest/group/#{id}"
170
257
  ].each do |uri|
171
258
  begin
172
259
  @group = JSON.parse RestClient.get uri
173
260
  break
174
- rescue RestClient::ResourceNotFound
261
+ rescue
262
+ # FakeWeb raises FakeWeb::NetConnectNotAllowedError, whereas
263
+ # RestClient raises RestClient::ResourceNotFound in the "real world".
175
264
  nil
176
265
  end
177
266
  end
@@ -190,8 +279,8 @@ module DataKitten
190
279
  end
191
280
 
192
281
  def extract_agent(name_field, email_field)
193
- name = metadata[name_field]
194
- email = metadata[email_field]
282
+ name = metadata.lookup(name_field)
283
+ email = metadata.lookup(email_field)
195
284
  if [name, email].any?
196
285
  [Agent.new(name: name, mbox: email)]
197
286
  else
@@ -0,0 +1,38 @@
1
+ module GuessableLookup
2
+
3
+ def lookup(*path)
4
+ data = self
5
+ path.each { |key| data = guess_key(data, key) }
6
+ data
7
+ rescue
8
+ nil
9
+ end
10
+
11
+ private
12
+
13
+ # Guesses which key you want from a hash and returns the value of it.
14
+ #
15
+ # It returns the value of the original key if it exists in the hash, otherwise
16
+ # tries to find a similar key, and if it fails it returns nil.
17
+ # Similar keys are ones which use '_', '-' or '' as word separators & are
18
+ # case-insensitive.
19
+ #
20
+ # @example
21
+ # guess_key({:a_key => true}, 'a_key') # => true
22
+ # guess_key({:aKey => true}, 'a_key') # => true
23
+ # guess_key({"a-KEY" => true}, 'a_key') # => true
24
+ #
25
+ # @param data [Hash]
26
+ # @param key [String] The desired key
27
+ # @return The value of the guessed key
28
+ #
29
+ def guess_key(data, key)
30
+ return data[key] if key.is_a?(Fixnum) || data.keys.include?(key)
31
+ likeKey = key.gsub(/[\_\-]/, "[\_\-]?")
32
+ key = data.keys.select { |k| k =~ /^#{likeKey}$/i }.first
33
+ data[key]
34
+ rescue
35
+ nil
36
+ end
37
+
38
+ end
@@ -1,3 +1,3 @@
1
1
  module DataKitten
2
- VERSION = "1.2.0"
2
+ VERSION = "1.3.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kitten
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Smith
@@ -214,6 +214,7 @@ files:
214
214
  - lib/data_kitten/rights.rb
215
215
  - lib/data_kitten/source.rb
216
216
  - lib/data_kitten/temporal.rb
217
+ - lib/data_kitten/utils/guessable_lookup.rb
217
218
  - lib/data_kitten/version.rb
218
219
  homepage: http://github.com/data-kitten
219
220
  licenses: