data_kitten 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/data_kitten/dataset.rb +7 -1
- data/lib/data_kitten/fetcher.rb +4 -0
- data/lib/data_kitten/origins/json.rb +31 -0
- data/lib/data_kitten/origins.rb +3 -1
- data/lib/data_kitten/publishing_formats/ckan.rb +73 -33
- data/lib/data_kitten/utils/ckan3_hash.rb +8 -0
- data/lib/data_kitten/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NTg3MWUzZWM3MDJiMjM3YjRjM2NiNGNjMmEyNWQ2ZGI0ZDk4YTYyZg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MTk4YWMxMWRlODZiNmQ0ODQxOTMxMDJhZmZlODFlZDY1MjZlYWZkNQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZWM1NTRhZWUwMzg0ZGM4YzhkYmZkNWVjZDI0ZDQ4MDRlMzYzYWFkMWY5NjRi
|
10
|
+
ODYwOGExM2U3ZmZmZTdlYTQ4ZDYyYWFmMWUxOTI2NDUwMWI1NDRmZThkMWJm
|
11
|
+
ZWRjNzI5YWQxZDExMmM2Yjg2M2NhNmVlNTMzMDUwMTk3MTAxNTk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZmJhMzc0MjU3NGExODUwYTczZjVlNWNkMmI5NjgyYjZhZWVhNDA5YjdlZDFi
|
14
|
+
YTBmNjg2OGJmNzdlMGE1YTU4OTE4YzJhNzc5Mzg3OGI3MzlmYWRkZWIyY2Zl
|
15
|
+
OTM3NjY3YTNkMWY5YzU1MjRkMDJjZTIzN2MxM2Q3Y2VlYTc2OWM=
|
data/lib/data_kitten/dataset.rb
CHANGED
@@ -40,14 +40,16 @@ module DataKitten
|
|
40
40
|
# @param [Hash] options the details of the Dataset.
|
41
41
|
# @option options [String] :access_url A URL that can be used to access the Dataset.
|
42
42
|
#
|
43
|
-
def initialize(url_or_options)
|
43
|
+
def initialize(url_or_options, base_url=nil)
|
44
44
|
url = case url_or_options
|
45
45
|
when Hash
|
46
|
+
base_url ||= url_or_options[:base_url]
|
46
47
|
url_or_options[:access_url]
|
47
48
|
else
|
48
49
|
url_or_options
|
49
50
|
end
|
50
51
|
@access_url = DataKitten::Fetcher.wrap(url)
|
52
|
+
@base_uri = URI(base_url) if base_url
|
51
53
|
|
52
54
|
detect_origin
|
53
55
|
detect_host
|
@@ -58,6 +60,10 @@ module DataKitten
|
|
58
60
|
URI(@access_url.to_s)
|
59
61
|
end
|
60
62
|
|
63
|
+
def base_uri
|
64
|
+
@base_uri || uri.merge("/")
|
65
|
+
end
|
66
|
+
|
61
67
|
def url
|
62
68
|
@access_url.to_s
|
63
69
|
end
|
data/lib/data_kitten/fetcher.rb
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
module DataKitten
|
2
|
+
|
3
|
+
module Origins
|
4
|
+
|
5
|
+
# JSON origin module. Automatically mixed into {Dataset} for datasets that are accessed through an API.
|
6
|
+
#
|
7
|
+
# @see Dataset
|
8
|
+
#
|
9
|
+
module JSON
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def self.supported?(resource)
|
14
|
+
resource.json?
|
15
|
+
end
|
16
|
+
|
17
|
+
public
|
18
|
+
|
19
|
+
# The origin type of the dataset.
|
20
|
+
# @return [Symbol] +:html+
|
21
|
+
# @see Dataset#origin
|
22
|
+
def origin
|
23
|
+
:json
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
data/lib/data_kitten/origins.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'data_kitten/origins/git'
|
2
2
|
require 'data_kitten/origins/web_service'
|
3
3
|
require 'data_kitten/origins/html'
|
4
|
+
require 'data_kitten/origins/json'
|
4
5
|
require 'data_kitten/origins/linked_data'
|
5
6
|
|
6
7
|
module DataKitten
|
@@ -11,8 +12,9 @@ module DataKitten
|
|
11
12
|
|
12
13
|
def detect_origin
|
13
14
|
[
|
14
|
-
DataKitten::Origins::Git,
|
15
15
|
DataKitten::Origins::HTML,
|
16
|
+
DataKitten::Origins::JSON,
|
17
|
+
DataKitten::Origins::Git,
|
16
18
|
DataKitten::Origins::WebService,
|
17
19
|
DataKitten::Origins::LinkedData,
|
18
20
|
].each do |origin|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'data_kitten/utils/guessable_lookup.rb'
|
2
|
+
require 'data_kitten/utils/ckan3_hash.rb'
|
2
3
|
|
3
4
|
module DataKitten
|
4
5
|
|
@@ -10,29 +11,42 @@ module DataKitten
|
|
10
11
|
|
11
12
|
def self.supported?(instance)
|
12
13
|
uri = instance.uri
|
13
|
-
base_uri =
|
14
|
+
base_uri = instance.base_uri
|
14
15
|
*base, package = uri.path.split('/')
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
instance.identifier =
|
16
|
+
if uri.path =~ %r{api/\d+/action/package_show/?$}
|
17
|
+
result = JSON.parse(RestClient.get(uri.to_s))['result']
|
18
|
+
|
19
|
+
instance.identifier = result['id']
|
20
|
+
result['extras'] = CKAN3Hash.new(result['extras'], 'key', 'value')
|
21
|
+
result['tags'] = CKAN3Hash.new(result['tags'], 'name', 'display_name').values
|
22
|
+
instance.metadata = result
|
23
|
+
elsif uri.path =~ %r{api/\d+/rest/dataset/}
|
24
|
+
result = JSON.parse(RestClient.get(uri.to_s))
|
25
|
+
instance.identifier = result['id']
|
26
|
+
instance.metadata = result
|
25
27
|
else
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
# If the 2nd to last element in the path is 'dataset' then it's probably
|
29
|
+
# the CKAN dataset view page, the last element will be the dataset id
|
30
|
+
# or name
|
31
|
+
if base.last == "dataset"
|
32
|
+
instance.identifier = package
|
33
|
+
# build a base URI ending with a /
|
34
|
+
base_uri = uri.merge(base[0...-1].join('/') + '/')
|
35
|
+
# If the package is a UUID - it's more than likely to be a CKAN ID
|
36
|
+
elsif package.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/)
|
37
|
+
instance.identifier = package
|
38
|
+
else
|
39
|
+
results = begin
|
40
|
+
RestClient.get base_uri.merge("api/3/action/package_show").to_s, {:params => {:id => package}}
|
41
|
+
rescue RestClient::Exception
|
42
|
+
RestClient.get base_uri.merge("api/2/rest/dataset/#{package}").to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
result = JSON.parse results
|
46
|
+
instance.identifier = result.fetch("result", result)["id"]
|
30
47
|
end
|
31
|
-
|
32
|
-
result = JSON.parse results
|
33
|
-
instance.identifier = result.fetch("result", result)["id"]
|
48
|
+
instance.metadata = JSON.parse RestClient.get base_uri.merge("api/rest/package/#{instance.identifier}").to_s
|
34
49
|
end
|
35
|
-
instance.metadata = JSON.parse RestClient.get base_uri.merge("api/rest/package/#{instance.identifier}").to_s
|
36
50
|
instance.metadata.extend(GuessableLookup)
|
37
51
|
instance.source = instance.metadata
|
38
52
|
return true
|
@@ -98,10 +112,15 @@ module DataKitten
|
|
98
112
|
#
|
99
113
|
# @see Dataset#publishers
|
100
114
|
def publishers
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
115
|
+
org = fetch_organization
|
116
|
+
result = if org
|
117
|
+
[org]
|
118
|
+
elsif group_id = metadata.lookup('groups', 0, 'id')
|
119
|
+
[fetch_publisher(group_id)]
|
120
|
+
else
|
121
|
+
[]
|
122
|
+
end
|
123
|
+
result.compact
|
105
124
|
end
|
106
125
|
|
107
126
|
def maintainers
|
@@ -251,15 +270,38 @@ module DataKitten
|
|
251
270
|
nil
|
252
271
|
end
|
253
272
|
|
273
|
+
def fetch_organization
|
274
|
+
if org = metadata['organization']
|
275
|
+
begin
|
276
|
+
uri = base_uri.merge("api/3/action/organization_show")
|
277
|
+
result = RestClient.get(uri.to_s, params: {id: org['id']})
|
278
|
+
org_data = JSON.parse(result)['result']
|
279
|
+
extras = CKAN3Hash.new(org_data['extras'], "key", "value")
|
280
|
+
rescue
|
281
|
+
uri = base_uri.merge("api/rest/group/#{org['id']}")
|
282
|
+
result = RestClient.get(uri.to_s)
|
283
|
+
org_data = JSON.parse(result)
|
284
|
+
extras = org_data['extras']
|
285
|
+
end
|
286
|
+
Agent.new(
|
287
|
+
:name => org_data['title'],
|
288
|
+
:mbox => (org_data['contact-email'] || extras['contact-email']),
|
289
|
+
:homepage => extras['website-url'] || base_uri.to_s
|
290
|
+
)
|
291
|
+
end
|
292
|
+
rescue
|
293
|
+
nil
|
294
|
+
end
|
295
|
+
|
254
296
|
def fetch_publisher(id)
|
255
297
|
uri = parsed_uri
|
256
298
|
[
|
257
|
-
"
|
258
|
-
"
|
259
|
-
"
|
299
|
+
"api/3/action/organization_show?id=#{id}",
|
300
|
+
"api/3/action/group_show?id=#{id}",
|
301
|
+
"api/rest/group/#{id}"
|
260
302
|
].each do |uri|
|
261
303
|
begin
|
262
|
-
@group = JSON.parse RestClient.get uri
|
304
|
+
@group = JSON.parse RestClient.get base_uri.merge(uri).to_s
|
263
305
|
break
|
264
306
|
rescue
|
265
307
|
# FakeWeb raises FakeWeb::NetConnectNotAllowedError, whereas
|
@@ -268,13 +310,11 @@ module DataKitten
|
|
268
310
|
end
|
269
311
|
end
|
270
312
|
|
271
|
-
|
272
|
-
Agent.new(
|
273
|
-
:name => @group["display_name"] || @group["result"]["title"],
|
313
|
+
if @group
|
314
|
+
Agent.new(:name => @group["display_name"] || @group["result"]["title"],
|
274
315
|
:homepage => select_extras(@group, "website-url"),
|
275
|
-
:mbox => select_extras(@group, "contact-email")
|
276
|
-
|
277
|
-
]
|
316
|
+
:mbox => select_extras(@group, "contact-email"))
|
317
|
+
end
|
278
318
|
end
|
279
319
|
|
280
320
|
def parsed_uri
|
data/lib/data_kitten/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kitten
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Smith
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2016-04-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -190,6 +190,7 @@ files:
|
|
190
190
|
- lib/data_kitten/origins.rb
|
191
191
|
- lib/data_kitten/origins/git.rb
|
192
192
|
- lib/data_kitten/origins/html.rb
|
193
|
+
- lib/data_kitten/origins/json.rb
|
193
194
|
- lib/data_kitten/origins/linked_data.rb
|
194
195
|
- lib/data_kitten/origins/web_service.rb
|
195
196
|
- lib/data_kitten/publishing_formats.rb
|
@@ -200,6 +201,7 @@ files:
|
|
200
201
|
- lib/data_kitten/rights.rb
|
201
202
|
- lib/data_kitten/source.rb
|
202
203
|
- lib/data_kitten/temporal.rb
|
204
|
+
- lib/data_kitten/utils/ckan3_hash.rb
|
203
205
|
- lib/data_kitten/utils/guessable_lookup.rb
|
204
206
|
- lib/data_kitten/version.rb
|
205
207
|
homepage: http://github.com/data-kitten
|