data_kitten 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OWUwYzRlMGU1MWNlYzNmZDNkZDIzYWQyOWZmZjAzNjcxZjVmMTVlZg==
4
+ Mjc1ZmZmMmFkOTcxZGYwMzI3ZTRiMzJlODU4ZmVlZDAxNTE0MzUwMA==
5
5
  data.tar.gz: !binary |-
6
- M2U2M2U4Njg2MGQwYWUyMzdkMzk3YmNmMWJiMGNiNWMzODhkMTE2Nw==
6
+ MDU3Yzc5YTU4OGQwY2Y5OWI2YThkODUzYzUyOGZlNWY5MTZmNDEyNA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ZmJhNmJiMmFkNDhkZjI1NmI4NDYyOGEzMDkxNjBjMzcxYTY0OGMyZWY5NWYw
10
- NTUzOTI3N2FlYmU5NzQ4ZTZjN2NiNTlhMWU4NjQ3NWRhMmYwOTE3ZTAwZGE0
11
- ODNiYzM4NjYzOTdmMDVmMGFjNWQ4ZTQ4ZGVkZTFiOTAyNWNlMGY=
9
+ MDM4ZmFmMDkxZmU4ZDQ5ZWY4YzdkODg5YzJhNjNjZjMzNTAxM2ExYmIyOTA4
10
+ NTYyYmI0ZmJlNWFlYTI2OTk2MmE2YmY4MTg2MTkzOTliMGFiYjk3N2ZkYTE3
11
+ ODIwZTM1MmI2OGU2ODcwODlhN2U1MmQxOTRkY2Q2YzZkNTNmZWU=
12
12
  data.tar.gz: !binary |-
13
- YzQ2YTM4YzllNDRhYzQzZjhkYWVmNjY0MThlMTZiZTg0ZjIwNWZlZDRiNmM5
14
- ZmM5MWM4NDc4ZDViMDAwOWFhM2JjYTU1ZTI0YWVhNDQyYzMzZDJlYjY1MDkw
15
- YzA1M2U1MmVlN2FlMmE4OGRkODAwYTA1Y2RhNTNhMGZlZDdiODk=
13
+ M2MyYTYwZGE0ZTBkYzNhYWYwM2I5NjcxYjI1ODA4ZDlhYTgxZDFhZGYwNzAx
14
+ NTU0MDQ5MjM2ZDViYzNkNmQzODk5NzEzYjZiMzNlZmEzN2YwZTM5MGM4NzYx
15
+ OGQ2NTExNTc4ZWFmZWI2OWFlN2EyNzRiOWMwN2YzMTkzMDZlMTg=
@@ -28,8 +28,6 @@ module DataKitten
28
28
  # @!attribute access_url
29
29
  # @return [String] the URL that gives access to the dataset
30
30
  attr_accessor :access_url
31
- alias_method :uri, :access_url
32
- alias_method :url, :access_url
33
31
 
34
32
  # Create a new Dataset object
35
33
  #
@@ -38,11 +36,19 @@ module DataKitten
38
36
  # The class will attempt to auto-load metadata from this URL.
39
37
  #
40
38
  def initialize(options)
41
- @access_url = options[:access_url]
39
+ @access_url = DataKitten::Fetcher.wrap(options[:access_url])
42
40
  detect_origin
43
41
  detect_host
44
42
  detect_publishing_format
45
43
  end
44
+
45
+ def uri
46
+ URI(@access_url.to_s)
47
+ end
48
+
49
+ def url
50
+ @access_url.to_s
51
+ end
46
52
 
47
53
  # Can metadata be loaded for this Dataset?
48
54
  #
@@ -224,4 +230,4 @@ module DataKitten
224
230
  end
225
231
 
226
232
  end
227
- end
233
+ end
@@ -0,0 +1,70 @@
1
+ module DataKitten
2
+
3
+ class Fetcher
4
+
5
+ attr_reader :url
6
+
7
+ def self.wrap(url_or_fetcher)
8
+ if url_or_fetcher.is_a?(self)
9
+ url_or_fetcher
10
+ else
11
+ new(url_or_fetcher)
12
+ end
13
+ end
14
+
15
+ def initialize(url)
16
+ @url = url
17
+ end
18
+
19
+ def ok?
20
+ code == 200
21
+ end
22
+
23
+ def code
24
+ response ? response.code : @code
25
+ end
26
+
27
+ def body
28
+ response if response
29
+ end
30
+
31
+ def as_json
32
+ JSON.parse(body) if response
33
+ rescue JSON::ParserError
34
+ nil
35
+ end
36
+
37
+ def content_type
38
+ response.headers[:content_type] if response
39
+ end
40
+
41
+ def content_type_format
42
+ if val = content_type
43
+ val.split(';').first
44
+ end
45
+ end
46
+
47
+ def to_s
48
+ url.to_s
49
+ end
50
+
51
+ def html?
52
+ !!(content_type_format =~ %r{^text/html}i)
53
+ end
54
+
55
+ private
56
+ def response
57
+ unless @requested
58
+ @requested = true
59
+ begin
60
+ @response = RestClient.get(url)
61
+ rescue RestClient::ExceptionWithResponse => error
62
+ @error = error.response
63
+ @code = @error.code
64
+ end
65
+ end
66
+ @response
67
+ end
68
+ end
69
+
70
+ end
@@ -14,10 +14,11 @@ module DataKitten
14
14
  DataKitten::Hosts::Bitbucket,
15
15
  DataKitten::Hosts::Gist
16
16
  ].each do |host|
17
- extend host if host.supported?(@access_url)
17
+ extend host if host.supported?(url)
18
+ break
18
19
  end
19
20
  end
20
21
 
21
22
  end
22
23
 
23
- end
24
+ end
@@ -10,8 +10,8 @@ module DataKitten
10
10
 
11
11
  private
12
12
 
13
- def self.supported?(uri)
14
- uri =~ /\A(git|https?):\/\/.*\.git\Z/
13
+ def self.supported?(resource)
14
+ resource.to_s =~ /\A(git|https?):\/\/.*\.git\Z/
15
15
  end
16
16
 
17
17
  public
@@ -63,4 +63,4 @@ module DataKitten
63
63
 
64
64
  end
65
65
 
66
- end
66
+ end
@@ -10,10 +10,8 @@ module DataKitten
10
10
 
11
11
  private
12
12
 
13
- def self.supported?(uri)
14
- RestClient.get(uri).headers[:content_type] =~ /text\/html/
15
- rescue
16
- false
13
+ def self.supported?(resource)
14
+ resource.html?
17
15
  end
18
16
 
19
17
  public
@@ -29,4 +27,4 @@ module DataKitten
29
27
 
30
28
  end
31
29
 
32
- end
30
+ end
@@ -10,15 +10,10 @@ module DataKitten
10
10
 
11
11
  private
12
12
 
13
- def self.supported?(uri)
14
- content_type = RestClient.head(uri).headers[:content_type]
15
- return nil unless content_type
16
-
17
- return RDF::Format.content_types.keys.include?(
18
- content_type.split(";").first )
19
-
20
- rescue
21
- false
13
+ def self.supported?(resource)
14
+ if type = resource.content_type_format
15
+ RDF::Format.content_types.keys.include?(type)
16
+ end
22
17
  end
23
18
 
24
19
  public
@@ -34,4 +29,4 @@ module DataKitten
34
29
 
35
30
  end
36
31
 
37
- end
32
+ end
@@ -9,7 +9,7 @@ module DataKitten
9
9
  private
10
10
 
11
11
  def self.supported?(instance)
12
- uri = URI(instance.uri)
12
+ uri = instance.uri
13
13
  package = uri.path.split("/").last
14
14
  # If the package is a UUID - it's more than likely to be a CKAN ID
15
15
  if package.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/)
@@ -80,8 +80,9 @@ module DataKitten
80
80
  #
81
81
  # @see Dataset#licenses
82
82
  def licenses
83
- uri = metadata["license_url"] || metadata["extras"]["licence_url"] rescue nil
84
- name = metadata["license_title"] || metadata["extras"]["licence_url_title"] rescue nil
83
+ extras = metadata["extras"] || {}
84
+ uri = metadata["license_url"] || extras["licence_url"]
85
+ name = metadata["license_title"] || extras["licence_url_title"]
85
86
  [
86
87
  License.new(:id => metadata["license_id"],
87
88
  :uri => uri,
@@ -17,7 +17,7 @@ module DataKitten
17
17
  datapackage = DataPackage::Package.new( JSON.parse( metadata ) )
18
18
  return datapackage.datapackage_version != nil
19
19
  else
20
- datapackage = DataPackage::Package.new( instance.uri )
20
+ datapackage = DataPackage::Package.new( instance.url )
21
21
  return datapackage.datapackage_version != nil
22
22
  end
23
23
  rescue => e
@@ -157,7 +157,7 @@ module DataKitten
157
157
  metadata = load_file("datapackage.json")
158
158
  @datapackage = DataPackage::Package.new( JSON.parse( metadata ) )
159
159
  else
160
- @datapackage = DataPackage::Package.new( access_url )
160
+ @datapackage = DataPackage::Package.new( url )
161
161
  end
162
162
  end
163
163
  @datapackage
@@ -166,4 +166,4 @@ module DataKitten
166
166
 
167
167
  end
168
168
 
169
- end
169
+ end
@@ -26,7 +26,7 @@ module DataKitten
26
26
  #Supports content negotiation for various RDF serializations. Attempts "dataset autodiscovery" if it receives
27
27
  #an HTML response. This leaves the RDFa Publishing Format to just parse RDFa responses
28
28
  def self.create_graph(uri)
29
-
29
+
30
30
  resp = RestClient.get uri,
31
31
  :accept=>ACCEPT_HEADER
32
32
  return false if resp.code != 200
@@ -55,15 +55,13 @@ module DataKitten
55
55
  graph << reader.new( StringIO.new( resp.body ))
56
56
 
57
57
  return graph
58
- rescue => e
59
- #puts e
60
- #puts e.backtrace
58
+ rescue
61
59
  nil
62
60
  end
63
61
 
64
62
  #Can we create an RDF graph for this object containing the description of a dataset?
65
63
  def self.supported?(instance)
66
- graph = create_graph(instance.uri)
64
+ graph = create_graph(instance.url)
67
65
  return false unless graph
68
66
  return true if first_of_type(graph,
69
67
  [RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset,
@@ -80,23 +78,16 @@ module DataKitten
80
78
  :rdf
81
79
  end
82
80
 
83
- def uri
84
- access_url
85
- end
86
-
87
81
  private
88
82
 
89
83
  def dataset_uri
90
- access_url
84
+ url
91
85
  end
92
86
 
93
87
  def graph
94
- if !@graph
95
- @graph = LinkedData.create_graph(access_url)
96
- end
97
- @graph
88
+ @graph ||= LinkedData.create_graph(dataset_uri)
98
89
  end
99
90
 
100
91
  end
101
92
  end
102
- end
93
+ end
@@ -1,3 +1,3 @@
1
1
  module DataKitten
2
- VERSION = "1.0.2"
2
+ VERSION = "1.1.0"
3
3
  end
data/lib/data_kitten.rb CHANGED
@@ -19,6 +19,7 @@ require 'data_kitten/temporal'
19
19
  require 'data_kitten/dataset'
20
20
  require 'data_kitten/distribution_format'
21
21
  require 'data_kitten/distribution'
22
+ require 'data_kitten/fetcher'
22
23
 
23
24
  # A collection of classes that represent Datasets and other concepts, modeled on {http://www.w3.org/TR/vocab-dcat/ DCAT}.
24
25
  #
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kitten
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Smith
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-09-17 00:00:00.000000000 Z
12
+ date: 2015-04-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -195,6 +195,7 @@ files:
195
195
  - lib/data_kitten/dataset.rb
196
196
  - lib/data_kitten/distribution.rb
197
197
  - lib/data_kitten/distribution_format.rb
198
+ - lib/data_kitten/fetcher.rb
198
199
  - lib/data_kitten/hosts.rb
199
200
  - lib/data_kitten/hosts/bitbucket.rb
200
201
  - lib/data_kitten/hosts/gist.rb
@@ -234,7 +235,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
234
235
  version: '0'
235
236
  requirements: []
236
237
  rubyforge_project:
237
- rubygems_version: 2.3.0
238
+ rubygems_version: 2.4.5
238
239
  signing_key:
239
240
  specification_version: 4
240
241
  summary: Get dataset metadata in a consistent format - no matter what you throw at