data_kitten 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OWUwYzRlMGU1MWNlYzNmZDNkZDIzYWQyOWZmZjAzNjcxZjVmMTVlZg==
4
+ Mjc1ZmZmMmFkOTcxZGYwMzI3ZTRiMzJlODU4ZmVlZDAxNTE0MzUwMA==
5
5
  data.tar.gz: !binary |-
6
- M2U2M2U4Njg2MGQwYWUyMzdkMzk3YmNmMWJiMGNiNWMzODhkMTE2Nw==
6
+ MDU3Yzc5YTU4OGQwY2Y5OWI2YThkODUzYzUyOGZlNWY5MTZmNDEyNA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ZmJhNmJiMmFkNDhkZjI1NmI4NDYyOGEzMDkxNjBjMzcxYTY0OGMyZWY5NWYw
10
- NTUzOTI3N2FlYmU5NzQ4ZTZjN2NiNTlhMWU4NjQ3NWRhMmYwOTE3ZTAwZGE0
11
- ODNiYzM4NjYzOTdmMDVmMGFjNWQ4ZTQ4ZGVkZTFiOTAyNWNlMGY=
9
+ MDM4ZmFmMDkxZmU4ZDQ5ZWY4YzdkODg5YzJhNjNjZjMzNTAxM2ExYmIyOTA4
10
+ NTYyYmI0ZmJlNWFlYTI2OTk2MmE2YmY4MTg2MTkzOTliMGFiYjk3N2ZkYTE3
11
+ ODIwZTM1MmI2OGU2ODcwODlhN2U1MmQxOTRkY2Q2YzZkNTNmZWU=
12
12
  data.tar.gz: !binary |-
13
- YzQ2YTM4YzllNDRhYzQzZjhkYWVmNjY0MThlMTZiZTg0ZjIwNWZlZDRiNmM5
14
- ZmM5MWM4NDc4ZDViMDAwOWFhM2JjYTU1ZTI0YWVhNDQyYzMzZDJlYjY1MDkw
15
- YzA1M2U1MmVlN2FlMmE4OGRkODAwYTA1Y2RhNTNhMGZlZDdiODk=
13
+ M2MyYTYwZGE0ZTBkYzNhYWYwM2I5NjcxYjI1ODA4ZDlhYTgxZDFhZGYwNzAx
14
+ NTU0MDQ5MjM2ZDViYzNkNmQzODk5NzEzYjZiMzNlZmEzN2YwZTM5MGM4NzYx
15
+ OGQ2NTExNTc4ZWFmZWI2OWFlN2EyNzRiOWMwN2YzMTkzMDZlMTg=
@@ -28,8 +28,6 @@ module DataKitten
28
28
  # @!attribute access_url
29
29
  # @return [String] the URL that gives access to the dataset
30
30
  attr_accessor :access_url
31
- alias_method :uri, :access_url
32
- alias_method :url, :access_url
33
31
 
34
32
  # Create a new Dataset object
35
33
  #
@@ -38,11 +36,19 @@ module DataKitten
38
36
  # The class will attempt to auto-load metadata from this URL.
39
37
  #
40
38
  def initialize(options)
41
- @access_url = options[:access_url]
39
+ @access_url = DataKitten::Fetcher.wrap(options[:access_url])
42
40
  detect_origin
43
41
  detect_host
44
42
  detect_publishing_format
45
43
  end
44
+
45
+ def uri
46
+ URI(@access_url.to_s)
47
+ end
48
+
49
+ def url
50
+ @access_url.to_s
51
+ end
46
52
 
47
53
  # Can metadata be loaded for this Dataset?
48
54
  #
@@ -224,4 +230,4 @@ module DataKitten
224
230
  end
225
231
 
226
232
  end
227
- end
233
+ end
@@ -0,0 +1,70 @@
1
+ module DataKitten
2
+
3
+ class Fetcher
4
+
5
+ attr_reader :url
6
+
7
+ def self.wrap(url_or_fetcher)
8
+ if url_or_fetcher.is_a?(self)
9
+ url_or_fetcher
10
+ else
11
+ new(url_or_fetcher)
12
+ end
13
+ end
14
+
15
+ def initialize(url)
16
+ @url = url
17
+ end
18
+
19
+ def ok?
20
+ code == 200
21
+ end
22
+
23
+ def code
24
+ response ? response.code : @code
25
+ end
26
+
27
+ def body
28
+ response if response
29
+ end
30
+
31
+ def as_json
32
+ JSON.parse(body) if response
33
+ rescue JSON::ParserError
34
+ nil
35
+ end
36
+
37
+ def content_type
38
+ response.headers[:content_type] if response
39
+ end
40
+
41
+ def content_type_format
42
+ if val = content_type
43
+ val.split(';').first
44
+ end
45
+ end
46
+
47
+ def to_s
48
+ url.to_s
49
+ end
50
+
51
+ def html?
52
+ !!(content_type_format =~ %r{^text/html}i)
53
+ end
54
+
55
+ private
56
+ def response
57
+ unless @requested
58
+ @requested = true
59
+ begin
60
+ @response = RestClient.get(url)
61
+ rescue RestClient::ExceptionWithResponse => error
62
+ @error = error.response
63
+ @code = @error.code
64
+ end
65
+ end
66
+ @response
67
+ end
68
+ end
69
+
70
+ end
@@ -14,10 +14,11 @@ module DataKitten
14
14
  DataKitten::Hosts::Bitbucket,
15
15
  DataKitten::Hosts::Gist
16
16
  ].each do |host|
17
- extend host if host.supported?(@access_url)
17
+ extend host if host.supported?(url)
18
+ break
18
19
  end
19
20
  end
20
21
 
21
22
  end
22
23
 
23
- end
24
+ end
@@ -10,8 +10,8 @@ module DataKitten
10
10
 
11
11
  private
12
12
 
13
- def self.supported?(uri)
14
- uri =~ /\A(git|https?):\/\/.*\.git\Z/
13
+ def self.supported?(resource)
14
+ resource.to_s =~ /\A(git|https?):\/\/.*\.git\Z/
15
15
  end
16
16
 
17
17
  public
@@ -63,4 +63,4 @@ module DataKitten
63
63
 
64
64
  end
65
65
 
66
- end
66
+ end
@@ -10,10 +10,8 @@ module DataKitten
10
10
 
11
11
  private
12
12
 
13
- def self.supported?(uri)
14
- RestClient.get(uri).headers[:content_type] =~ /text\/html/
15
- rescue
16
- false
13
+ def self.supported?(resource)
14
+ resource.html?
17
15
  end
18
16
 
19
17
  public
@@ -29,4 +27,4 @@ module DataKitten
29
27
 
30
28
  end
31
29
 
32
- end
30
+ end
@@ -10,15 +10,10 @@ module DataKitten
10
10
 
11
11
  private
12
12
 
13
- def self.supported?(uri)
14
- content_type = RestClient.head(uri).headers[:content_type]
15
- return nil unless content_type
16
-
17
- return RDF::Format.content_types.keys.include?(
18
- content_type.split(";").first )
19
-
20
- rescue
21
- false
13
+ def self.supported?(resource)
14
+ if type = resource.content_type_format
15
+ RDF::Format.content_types.keys.include?(type)
16
+ end
22
17
  end
23
18
 
24
19
  public
@@ -34,4 +29,4 @@ module DataKitten
34
29
 
35
30
  end
36
31
 
37
- end
32
+ end
@@ -9,7 +9,7 @@ module DataKitten
9
9
  private
10
10
 
11
11
  def self.supported?(instance)
12
- uri = URI(instance.uri)
12
+ uri = instance.uri
13
13
  package = uri.path.split("/").last
14
14
  # If the package is a UUID - it's more than likely to be a CKAN ID
15
15
  if package.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/)
@@ -80,8 +80,9 @@ module DataKitten
80
80
  #
81
81
  # @see Dataset#licenses
82
82
  def licenses
83
- uri = metadata["license_url"] || metadata["extras"]["licence_url"] rescue nil
84
- name = metadata["license_title"] || metadata["extras"]["licence_url_title"] rescue nil
83
+ extras = metadata["extras"] || {}
84
+ uri = metadata["license_url"] || extras["licence_url"]
85
+ name = metadata["license_title"] || extras["licence_url_title"]
85
86
  [
86
87
  License.new(:id => metadata["license_id"],
87
88
  :uri => uri,
@@ -17,7 +17,7 @@ module DataKitten
17
17
  datapackage = DataPackage::Package.new( JSON.parse( metadata ) )
18
18
  return datapackage.datapackage_version != nil
19
19
  else
20
- datapackage = DataPackage::Package.new( instance.uri )
20
+ datapackage = DataPackage::Package.new( instance.url )
21
21
  return datapackage.datapackage_version != nil
22
22
  end
23
23
  rescue => e
@@ -157,7 +157,7 @@ module DataKitten
157
157
  metadata = load_file("datapackage.json")
158
158
  @datapackage = DataPackage::Package.new( JSON.parse( metadata ) )
159
159
  else
160
- @datapackage = DataPackage::Package.new( access_url )
160
+ @datapackage = DataPackage::Package.new( url )
161
161
  end
162
162
  end
163
163
  @datapackage
@@ -166,4 +166,4 @@ module DataKitten
166
166
 
167
167
  end
168
168
 
169
- end
169
+ end
@@ -26,7 +26,7 @@ module DataKitten
26
26
  #Supports content negotiation for various RDF serializations. Attempts "dataset autodiscovery" if it receives
27
27
  #an HTML response. This leaves the RDFa Publishing Format to just parse RDFa responses
28
28
  def self.create_graph(uri)
29
-
29
+
30
30
  resp = RestClient.get uri,
31
31
  :accept=>ACCEPT_HEADER
32
32
  return false if resp.code != 200
@@ -55,15 +55,13 @@ module DataKitten
55
55
  graph << reader.new( StringIO.new( resp.body ))
56
56
 
57
57
  return graph
58
- rescue => e
59
- #puts e
60
- #puts e.backtrace
58
+ rescue
61
59
  nil
62
60
  end
63
61
 
64
62
  #Can we create an RDF graph for this object containing the description of a dataset?
65
63
  def self.supported?(instance)
66
- graph = create_graph(instance.uri)
64
+ graph = create_graph(instance.url)
67
65
  return false unless graph
68
66
  return true if first_of_type(graph,
69
67
  [RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset,
@@ -80,23 +78,16 @@ module DataKitten
80
78
  :rdf
81
79
  end
82
80
 
83
- def uri
84
- access_url
85
- end
86
-
87
81
  private
88
82
 
89
83
  def dataset_uri
90
- access_url
84
+ url
91
85
  end
92
86
 
93
87
  def graph
94
- if !@graph
95
- @graph = LinkedData.create_graph(access_url)
96
- end
97
- @graph
88
+ @graph ||= LinkedData.create_graph(dataset_uri)
98
89
  end
99
90
 
100
91
  end
101
92
  end
102
- end
93
+ end
@@ -1,3 +1,3 @@
1
1
  module DataKitten
2
- VERSION = "1.0.2"
2
+ VERSION = "1.1.0"
3
3
  end
data/lib/data_kitten.rb CHANGED
@@ -19,6 +19,7 @@ require 'data_kitten/temporal'
19
19
  require 'data_kitten/dataset'
20
20
  require 'data_kitten/distribution_format'
21
21
  require 'data_kitten/distribution'
22
+ require 'data_kitten/fetcher'
22
23
 
23
24
  # A collection of classes that represent Datasets and other concepts, modeled on {http://www.w3.org/TR/vocab-dcat/ DCAT}.
24
25
  #
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kitten
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Smith
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-09-17 00:00:00.000000000 Z
12
+ date: 2015-04-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -195,6 +195,7 @@ files:
195
195
  - lib/data_kitten/dataset.rb
196
196
  - lib/data_kitten/distribution.rb
197
197
  - lib/data_kitten/distribution_format.rb
198
+ - lib/data_kitten/fetcher.rb
198
199
  - lib/data_kitten/hosts.rb
199
200
  - lib/data_kitten/hosts/bitbucket.rb
200
201
  - lib/data_kitten/hosts/gist.rb
@@ -234,7 +235,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
234
235
  version: '0'
235
236
  requirements: []
236
237
  rubyforge_project:
237
- rubygems_version: 2.3.0
238
+ rubygems_version: 2.4.5
238
239
  signing_key:
239
240
  specification_version: 4
240
241
  summary: Get dataset metadata in a consistent format - no matter what you throw at