data_kitten 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/data_kitten/dataset.rb +10 -4
- data/lib/data_kitten/fetcher.rb +70 -0
- data/lib/data_kitten/hosts.rb +3 -2
- data/lib/data_kitten/origins/git.rb +3 -3
- data/lib/data_kitten/origins/html.rb +3 -5
- data/lib/data_kitten/origins/linked_data.rb +5 -10
- data/lib/data_kitten/publishing_formats/ckan.rb +4 -3
- data/lib/data_kitten/publishing_formats/datapackage.rb +3 -3
- data/lib/data_kitten/publishing_formats/linked_data.rb +6 -15
- data/lib/data_kitten/version.rb +1 -1
- data/lib/data_kitten.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
Mjc1ZmZmMmFkOTcxZGYwMzI3ZTRiMzJlODU4ZmVlZDAxNTE0MzUwMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDU3Yzc5YTU4OGQwY2Y5OWI2YThkODUzYzUyOGZlNWY5MTZmNDEyNA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MDM4ZmFmMDkxZmU4ZDQ5ZWY4YzdkODg5YzJhNjNjZjMzNTAxM2ExYmIyOTA4
|
10
|
+
NTYyYmI0ZmJlNWFlYTI2OTk2MmE2YmY4MTg2MTkzOTliMGFiYjk3N2ZkYTE3
|
11
|
+
ODIwZTM1MmI2OGU2ODcwODlhN2U1MmQxOTRkY2Q2YzZkNTNmZWU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
M2MyYTYwZGE0ZTBkYzNhYWYwM2I5NjcxYjI1ODA4ZDlhYTgxZDFhZGYwNzAx
|
14
|
+
NTU0MDQ5MjM2ZDViYzNkNmQzODk5NzEzYjZiMzNlZmEzN2YwZTM5MGM4NzYx
|
15
|
+
OGQ2NTExNTc4ZWFmZWI2OWFlN2EyNzRiOWMwN2YzMTkzMDZlMTg=
|
data/lib/data_kitten/dataset.rb
CHANGED
@@ -28,8 +28,6 @@ module DataKitten
|
|
28
28
|
# @!attribute access_url
|
29
29
|
# @return [String] the URL that gives access to the dataset
|
30
30
|
attr_accessor :access_url
|
31
|
-
alias_method :uri, :access_url
|
32
|
-
alias_method :url, :access_url
|
33
31
|
|
34
32
|
# Create a new Dataset object
|
35
33
|
#
|
@@ -38,11 +36,19 @@ module DataKitten
|
|
38
36
|
# The class will attempt to auto-load metadata from this URL.
|
39
37
|
#
|
40
38
|
def initialize(options)
|
41
|
-
@access_url = options[:access_url]
|
39
|
+
@access_url = DataKitten::Fetcher.wrap(options[:access_url])
|
42
40
|
detect_origin
|
43
41
|
detect_host
|
44
42
|
detect_publishing_format
|
45
43
|
end
|
44
|
+
|
45
|
+
def uri
|
46
|
+
URI(@access_url.to_s)
|
47
|
+
end
|
48
|
+
|
49
|
+
def url
|
50
|
+
@access_url.to_s
|
51
|
+
end
|
46
52
|
|
47
53
|
# Can metadata be loaded for this Dataset?
|
48
54
|
#
|
@@ -224,4 +230,4 @@ module DataKitten
|
|
224
230
|
end
|
225
231
|
|
226
232
|
end
|
227
|
-
end
|
233
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module DataKitten
|
2
|
+
|
3
|
+
class Fetcher
|
4
|
+
|
5
|
+
attr_reader :url
|
6
|
+
|
7
|
+
def self.wrap(url_or_fetcher)
|
8
|
+
if url_or_fetcher.is_a?(self)
|
9
|
+
url_or_fetcher
|
10
|
+
else
|
11
|
+
new(url_or_fetcher)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(url)
|
16
|
+
@url = url
|
17
|
+
end
|
18
|
+
|
19
|
+
def ok?
|
20
|
+
code == 200
|
21
|
+
end
|
22
|
+
|
23
|
+
def code
|
24
|
+
response ? response.code : @code
|
25
|
+
end
|
26
|
+
|
27
|
+
def body
|
28
|
+
response if response
|
29
|
+
end
|
30
|
+
|
31
|
+
def as_json
|
32
|
+
JSON.parse(body) if response
|
33
|
+
rescue JSON::ParserError
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def content_type
|
38
|
+
response.headers[:content_type] if response
|
39
|
+
end
|
40
|
+
|
41
|
+
def content_type_format
|
42
|
+
if val = content_type
|
43
|
+
val.split(';').first
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_s
|
48
|
+
url.to_s
|
49
|
+
end
|
50
|
+
|
51
|
+
def html?
|
52
|
+
!!(content_type_format =~ %r{^text/html}i)
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
def response
|
57
|
+
unless @requested
|
58
|
+
@requested = true
|
59
|
+
begin
|
60
|
+
@response = RestClient.get(url)
|
61
|
+
rescue RestClient::ExceptionWithResponse => error
|
62
|
+
@error = error.response
|
63
|
+
@code = @error.code
|
64
|
+
end
|
65
|
+
end
|
66
|
+
@response
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
data/lib/data_kitten/hosts.rb
CHANGED
@@ -10,8 +10,8 @@ module DataKitten
|
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
-
def self.supported?(
|
14
|
-
|
13
|
+
def self.supported?(resource)
|
14
|
+
resource.to_s =~ /\A(git|https?):\/\/.*\.git\Z/
|
15
15
|
end
|
16
16
|
|
17
17
|
public
|
@@ -63,4 +63,4 @@ module DataKitten
|
|
63
63
|
|
64
64
|
end
|
65
65
|
|
66
|
-
end
|
66
|
+
end
|
@@ -10,10 +10,8 @@ module DataKitten
|
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
-
def self.supported?(
|
14
|
-
|
15
|
-
rescue
|
16
|
-
false
|
13
|
+
def self.supported?(resource)
|
14
|
+
resource.html?
|
17
15
|
end
|
18
16
|
|
19
17
|
public
|
@@ -29,4 +27,4 @@ module DataKitten
|
|
29
27
|
|
30
28
|
end
|
31
29
|
|
32
|
-
end
|
30
|
+
end
|
@@ -10,15 +10,10 @@ module DataKitten
|
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
-
def self.supported?(
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
return RDF::Format.content_types.keys.include?(
|
18
|
-
content_type.split(";").first )
|
19
|
-
|
20
|
-
rescue
|
21
|
-
false
|
13
|
+
def self.supported?(resource)
|
14
|
+
if type = resource.content_type_format
|
15
|
+
RDF::Format.content_types.keys.include?(type)
|
16
|
+
end
|
22
17
|
end
|
23
18
|
|
24
19
|
public
|
@@ -34,4 +29,4 @@ module DataKitten
|
|
34
29
|
|
35
30
|
end
|
36
31
|
|
37
|
-
end
|
32
|
+
end
|
@@ -9,7 +9,7 @@ module DataKitten
|
|
9
9
|
private
|
10
10
|
|
11
11
|
def self.supported?(instance)
|
12
|
-
uri =
|
12
|
+
uri = instance.uri
|
13
13
|
package = uri.path.split("/").last
|
14
14
|
# If the package is a UUID - it's more than likely to be a CKAN ID
|
15
15
|
if package.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/)
|
@@ -80,8 +80,9 @@ module DataKitten
|
|
80
80
|
#
|
81
81
|
# @see Dataset#licenses
|
82
82
|
def licenses
|
83
|
-
|
84
|
-
|
83
|
+
extras = metadata["extras"] || {}
|
84
|
+
uri = metadata["license_url"] || extras["licence_url"]
|
85
|
+
name = metadata["license_title"] || extras["licence_url_title"]
|
85
86
|
[
|
86
87
|
License.new(:id => metadata["license_id"],
|
87
88
|
:uri => uri,
|
@@ -17,7 +17,7 @@ module DataKitten
|
|
17
17
|
datapackage = DataPackage::Package.new( JSON.parse( metadata ) )
|
18
18
|
return datapackage.datapackage_version != nil
|
19
19
|
else
|
20
|
-
datapackage = DataPackage::Package.new( instance.
|
20
|
+
datapackage = DataPackage::Package.new( instance.url )
|
21
21
|
return datapackage.datapackage_version != nil
|
22
22
|
end
|
23
23
|
rescue => e
|
@@ -157,7 +157,7 @@ module DataKitten
|
|
157
157
|
metadata = load_file("datapackage.json")
|
158
158
|
@datapackage = DataPackage::Package.new( JSON.parse( metadata ) )
|
159
159
|
else
|
160
|
-
@datapackage = DataPackage::Package.new(
|
160
|
+
@datapackage = DataPackage::Package.new( url )
|
161
161
|
end
|
162
162
|
end
|
163
163
|
@datapackage
|
@@ -166,4 +166,4 @@ module DataKitten
|
|
166
166
|
|
167
167
|
end
|
168
168
|
|
169
|
-
end
|
169
|
+
end
|
@@ -26,7 +26,7 @@ module DataKitten
|
|
26
26
|
#Supports content negotiation for various RDF serializations. Attempts "dataset autodiscovery" if it receives
|
27
27
|
#an HTML response. This leaves the RDFa Publishing Format to just parse RDFa responses
|
28
28
|
def self.create_graph(uri)
|
29
|
-
|
29
|
+
|
30
30
|
resp = RestClient.get uri,
|
31
31
|
:accept=>ACCEPT_HEADER
|
32
32
|
return false if resp.code != 200
|
@@ -55,15 +55,13 @@ module DataKitten
|
|
55
55
|
graph << reader.new( StringIO.new( resp.body ))
|
56
56
|
|
57
57
|
return graph
|
58
|
-
rescue
|
59
|
-
#puts e
|
60
|
-
#puts e.backtrace
|
58
|
+
rescue
|
61
59
|
nil
|
62
60
|
end
|
63
61
|
|
64
62
|
#Can we create an RDF graph for this object containing the description of a dataset?
|
65
63
|
def self.supported?(instance)
|
66
|
-
graph = create_graph(instance.
|
64
|
+
graph = create_graph(instance.url)
|
67
65
|
return false unless graph
|
68
66
|
return true if first_of_type(graph,
|
69
67
|
[RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset,
|
@@ -80,23 +78,16 @@ module DataKitten
|
|
80
78
|
:rdf
|
81
79
|
end
|
82
80
|
|
83
|
-
def uri
|
84
|
-
access_url
|
85
|
-
end
|
86
|
-
|
87
81
|
private
|
88
82
|
|
89
83
|
def dataset_uri
|
90
|
-
|
84
|
+
url
|
91
85
|
end
|
92
86
|
|
93
87
|
def graph
|
94
|
-
|
95
|
-
@graph = LinkedData.create_graph(access_url)
|
96
|
-
end
|
97
|
-
@graph
|
88
|
+
@graph ||= LinkedData.create_graph(dataset_uri)
|
98
89
|
end
|
99
90
|
|
100
91
|
end
|
101
92
|
end
|
102
|
-
end
|
93
|
+
end
|
data/lib/data_kitten/version.rb
CHANGED
data/lib/data_kitten.rb
CHANGED
@@ -19,6 +19,7 @@ require 'data_kitten/temporal'
|
|
19
19
|
require 'data_kitten/dataset'
|
20
20
|
require 'data_kitten/distribution_format'
|
21
21
|
require 'data_kitten/distribution'
|
22
|
+
require 'data_kitten/fetcher'
|
22
23
|
|
23
24
|
# A collection of classes that represent Datasets and other concepts, modeled on {http://www.w3.org/TR/vocab-dcat/ DCAT}.
|
24
25
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kitten
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Smith
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2015-04-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -195,6 +195,7 @@ files:
|
|
195
195
|
- lib/data_kitten/dataset.rb
|
196
196
|
- lib/data_kitten/distribution.rb
|
197
197
|
- lib/data_kitten/distribution_format.rb
|
198
|
+
- lib/data_kitten/fetcher.rb
|
198
199
|
- lib/data_kitten/hosts.rb
|
199
200
|
- lib/data_kitten/hosts/bitbucket.rb
|
200
201
|
- lib/data_kitten/hosts/gist.rb
|
@@ -234,7 +235,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
234
235
|
version: '0'
|
235
236
|
requirements: []
|
236
237
|
rubyforge_project:
|
237
|
-
rubygems_version: 2.
|
238
|
+
rubygems_version: 2.4.5
|
238
239
|
signing_key:
|
239
240
|
specification_version: 4
|
240
241
|
summary: Get dataset metadata in a consistent format - no matter what you throw at
|