data_kitten 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/data_kitten/dataset.rb +10 -4
- data/lib/data_kitten/fetcher.rb +70 -0
- data/lib/data_kitten/hosts.rb +3 -2
- data/lib/data_kitten/origins/git.rb +3 -3
- data/lib/data_kitten/origins/html.rb +3 -5
- data/lib/data_kitten/origins/linked_data.rb +5 -10
- data/lib/data_kitten/publishing_formats/ckan.rb +4 -3
- data/lib/data_kitten/publishing_formats/datapackage.rb +3 -3
- data/lib/data_kitten/publishing_formats/linked_data.rb +6 -15
- data/lib/data_kitten/version.rb +1 -1
- data/lib/data_kitten.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
Mjc1ZmZmMmFkOTcxZGYwMzI3ZTRiMzJlODU4ZmVlZDAxNTE0MzUwMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDU3Yzc5YTU4OGQwY2Y5OWI2YThkODUzYzUyOGZlNWY5MTZmNDEyNA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MDM4ZmFmMDkxZmU4ZDQ5ZWY4YzdkODg5YzJhNjNjZjMzNTAxM2ExYmIyOTA4
|
10
|
+
NTYyYmI0ZmJlNWFlYTI2OTk2MmE2YmY4MTg2MTkzOTliMGFiYjk3N2ZkYTE3
|
11
|
+
ODIwZTM1MmI2OGU2ODcwODlhN2U1MmQxOTRkY2Q2YzZkNTNmZWU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
M2MyYTYwZGE0ZTBkYzNhYWYwM2I5NjcxYjI1ODA4ZDlhYTgxZDFhZGYwNzAx
|
14
|
+
NTU0MDQ5MjM2ZDViYzNkNmQzODk5NzEzYjZiMzNlZmEzN2YwZTM5MGM4NzYx
|
15
|
+
OGQ2NTExNTc4ZWFmZWI2OWFlN2EyNzRiOWMwN2YzMTkzMDZlMTg=
|
data/lib/data_kitten/dataset.rb
CHANGED
@@ -28,8 +28,6 @@ module DataKitten
|
|
28
28
|
# @!attribute access_url
|
29
29
|
# @return [String] the URL that gives access to the dataset
|
30
30
|
attr_accessor :access_url
|
31
|
-
alias_method :uri, :access_url
|
32
|
-
alias_method :url, :access_url
|
33
31
|
|
34
32
|
# Create a new Dataset object
|
35
33
|
#
|
@@ -38,11 +36,19 @@ module DataKitten
|
|
38
36
|
# The class will attempt to auto-load metadata from this URL.
|
39
37
|
#
|
40
38
|
def initialize(options)
|
41
|
-
@access_url = options[:access_url]
|
39
|
+
@access_url = DataKitten::Fetcher.wrap(options[:access_url])
|
42
40
|
detect_origin
|
43
41
|
detect_host
|
44
42
|
detect_publishing_format
|
45
43
|
end
|
44
|
+
|
45
|
+
def uri
|
46
|
+
URI(@access_url.to_s)
|
47
|
+
end
|
48
|
+
|
49
|
+
def url
|
50
|
+
@access_url.to_s
|
51
|
+
end
|
46
52
|
|
47
53
|
# Can metadata be loaded for this Dataset?
|
48
54
|
#
|
@@ -224,4 +230,4 @@ module DataKitten
|
|
224
230
|
end
|
225
231
|
|
226
232
|
end
|
227
|
-
end
|
233
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module DataKitten
|
2
|
+
|
3
|
+
class Fetcher
|
4
|
+
|
5
|
+
attr_reader :url
|
6
|
+
|
7
|
+
def self.wrap(url_or_fetcher)
|
8
|
+
if url_or_fetcher.is_a?(self)
|
9
|
+
url_or_fetcher
|
10
|
+
else
|
11
|
+
new(url_or_fetcher)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(url)
|
16
|
+
@url = url
|
17
|
+
end
|
18
|
+
|
19
|
+
def ok?
|
20
|
+
code == 200
|
21
|
+
end
|
22
|
+
|
23
|
+
def code
|
24
|
+
response ? response.code : @code
|
25
|
+
end
|
26
|
+
|
27
|
+
def body
|
28
|
+
response if response
|
29
|
+
end
|
30
|
+
|
31
|
+
def as_json
|
32
|
+
JSON.parse(body) if response
|
33
|
+
rescue JSON::ParserError
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def content_type
|
38
|
+
response.headers[:content_type] if response
|
39
|
+
end
|
40
|
+
|
41
|
+
def content_type_format
|
42
|
+
if val = content_type
|
43
|
+
val.split(';').first
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_s
|
48
|
+
url.to_s
|
49
|
+
end
|
50
|
+
|
51
|
+
def html?
|
52
|
+
!!(content_type_format =~ %r{^text/html}i)
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
def response
|
57
|
+
unless @requested
|
58
|
+
@requested = true
|
59
|
+
begin
|
60
|
+
@response = RestClient.get(url)
|
61
|
+
rescue RestClient::ExceptionWithResponse => error
|
62
|
+
@error = error.response
|
63
|
+
@code = @error.code
|
64
|
+
end
|
65
|
+
end
|
66
|
+
@response
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
data/lib/data_kitten/hosts.rb
CHANGED
@@ -10,8 +10,8 @@ module DataKitten
|
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
-
def self.supported?(
|
14
|
-
|
13
|
+
def self.supported?(resource)
|
14
|
+
resource.to_s =~ /\A(git|https?):\/\/.*\.git\Z/
|
15
15
|
end
|
16
16
|
|
17
17
|
public
|
@@ -63,4 +63,4 @@ module DataKitten
|
|
63
63
|
|
64
64
|
end
|
65
65
|
|
66
|
-
end
|
66
|
+
end
|
@@ -10,10 +10,8 @@ module DataKitten
|
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
-
def self.supported?(
|
14
|
-
|
15
|
-
rescue
|
16
|
-
false
|
13
|
+
def self.supported?(resource)
|
14
|
+
resource.html?
|
17
15
|
end
|
18
16
|
|
19
17
|
public
|
@@ -29,4 +27,4 @@ module DataKitten
|
|
29
27
|
|
30
28
|
end
|
31
29
|
|
32
|
-
end
|
30
|
+
end
|
@@ -10,15 +10,10 @@ module DataKitten
|
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
-
def self.supported?(
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
return RDF::Format.content_types.keys.include?(
|
18
|
-
content_type.split(";").first )
|
19
|
-
|
20
|
-
rescue
|
21
|
-
false
|
13
|
+
def self.supported?(resource)
|
14
|
+
if type = resource.content_type_format
|
15
|
+
RDF::Format.content_types.keys.include?(type)
|
16
|
+
end
|
22
17
|
end
|
23
18
|
|
24
19
|
public
|
@@ -34,4 +29,4 @@ module DataKitten
|
|
34
29
|
|
35
30
|
end
|
36
31
|
|
37
|
-
end
|
32
|
+
end
|
@@ -9,7 +9,7 @@ module DataKitten
|
|
9
9
|
private
|
10
10
|
|
11
11
|
def self.supported?(instance)
|
12
|
-
uri =
|
12
|
+
uri = instance.uri
|
13
13
|
package = uri.path.split("/").last
|
14
14
|
# If the package is a UUID - it's more than likely to be a CKAN ID
|
15
15
|
if package.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/)
|
@@ -80,8 +80,9 @@ module DataKitten
|
|
80
80
|
#
|
81
81
|
# @see Dataset#licenses
|
82
82
|
def licenses
|
83
|
-
|
84
|
-
|
83
|
+
extras = metadata["extras"] || {}
|
84
|
+
uri = metadata["license_url"] || extras["licence_url"]
|
85
|
+
name = metadata["license_title"] || extras["licence_url_title"]
|
85
86
|
[
|
86
87
|
License.new(:id => metadata["license_id"],
|
87
88
|
:uri => uri,
|
@@ -17,7 +17,7 @@ module DataKitten
|
|
17
17
|
datapackage = DataPackage::Package.new( JSON.parse( metadata ) )
|
18
18
|
return datapackage.datapackage_version != nil
|
19
19
|
else
|
20
|
-
datapackage = DataPackage::Package.new( instance.
|
20
|
+
datapackage = DataPackage::Package.new( instance.url )
|
21
21
|
return datapackage.datapackage_version != nil
|
22
22
|
end
|
23
23
|
rescue => e
|
@@ -157,7 +157,7 @@ module DataKitten
|
|
157
157
|
metadata = load_file("datapackage.json")
|
158
158
|
@datapackage = DataPackage::Package.new( JSON.parse( metadata ) )
|
159
159
|
else
|
160
|
-
@datapackage = DataPackage::Package.new(
|
160
|
+
@datapackage = DataPackage::Package.new( url )
|
161
161
|
end
|
162
162
|
end
|
163
163
|
@datapackage
|
@@ -166,4 +166,4 @@ module DataKitten
|
|
166
166
|
|
167
167
|
end
|
168
168
|
|
169
|
-
end
|
169
|
+
end
|
@@ -26,7 +26,7 @@ module DataKitten
|
|
26
26
|
#Supports content negotiation for various RDF serializations. Attempts "dataset autodiscovery" if it receives
|
27
27
|
#an HTML response. This leaves the RDFa Publishing Format to just parse RDFa responses
|
28
28
|
def self.create_graph(uri)
|
29
|
-
|
29
|
+
|
30
30
|
resp = RestClient.get uri,
|
31
31
|
:accept=>ACCEPT_HEADER
|
32
32
|
return false if resp.code != 200
|
@@ -55,15 +55,13 @@ module DataKitten
|
|
55
55
|
graph << reader.new( StringIO.new( resp.body ))
|
56
56
|
|
57
57
|
return graph
|
58
|
-
rescue
|
59
|
-
#puts e
|
60
|
-
#puts e.backtrace
|
58
|
+
rescue
|
61
59
|
nil
|
62
60
|
end
|
63
61
|
|
64
62
|
#Can we create an RDF graph for this object containing the description of a dataset?
|
65
63
|
def self.supported?(instance)
|
66
|
-
graph = create_graph(instance.
|
64
|
+
graph = create_graph(instance.url)
|
67
65
|
return false unless graph
|
68
66
|
return true if first_of_type(graph,
|
69
67
|
[RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset,
|
@@ -80,23 +78,16 @@ module DataKitten
|
|
80
78
|
:rdf
|
81
79
|
end
|
82
80
|
|
83
|
-
def uri
|
84
|
-
access_url
|
85
|
-
end
|
86
|
-
|
87
81
|
private
|
88
82
|
|
89
83
|
def dataset_uri
|
90
|
-
|
84
|
+
url
|
91
85
|
end
|
92
86
|
|
93
87
|
def graph
|
94
|
-
|
95
|
-
@graph = LinkedData.create_graph(access_url)
|
96
|
-
end
|
97
|
-
@graph
|
88
|
+
@graph ||= LinkedData.create_graph(dataset_uri)
|
98
89
|
end
|
99
90
|
|
100
91
|
end
|
101
92
|
end
|
102
|
-
end
|
93
|
+
end
|
data/lib/data_kitten/version.rb
CHANGED
data/lib/data_kitten.rb
CHANGED
@@ -19,6 +19,7 @@ require 'data_kitten/temporal'
|
|
19
19
|
require 'data_kitten/dataset'
|
20
20
|
require 'data_kitten/distribution_format'
|
21
21
|
require 'data_kitten/distribution'
|
22
|
+
require 'data_kitten/fetcher'
|
22
23
|
|
23
24
|
# A collection of classes that represent Datasets and other concepts, modeled on {http://www.w3.org/TR/vocab-dcat/ DCAT}.
|
24
25
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kitten
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Smith
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2015-04-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -195,6 +195,7 @@ files:
|
|
195
195
|
- lib/data_kitten/dataset.rb
|
196
196
|
- lib/data_kitten/distribution.rb
|
197
197
|
- lib/data_kitten/distribution_format.rb
|
198
|
+
- lib/data_kitten/fetcher.rb
|
198
199
|
- lib/data_kitten/hosts.rb
|
199
200
|
- lib/data_kitten/hosts/bitbucket.rb
|
200
201
|
- lib/data_kitten/hosts/gist.rb
|
@@ -234,7 +235,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
234
235
|
version: '0'
|
235
236
|
requirements: []
|
236
237
|
rubyforge_project:
|
237
|
-
rubygems_version: 2.
|
238
|
+
rubygems_version: 2.4.5
|
238
239
|
signing_key:
|
239
240
|
specification_version: 4
|
240
241
|
summary: Get dataset metadata in a consistent format - no matter what you throw at
|