data_kitten 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/data_kitten/distribution.rb +31 -25
- data/lib/data_kitten/distribution_format.rb +10 -11
- data/lib/data_kitten/publishing_formats/ckan.rb +1 -1
- data/lib/data_kitten/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OWUwYzRlMGU1MWNlYzNmZDNkZDIzYWQyOWZmZjAzNjcxZjVmMTVlZg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
M2U2M2U4Njg2MGQwYWUyMzdkMzk3YmNmMWJiMGNiNWMzODhkMTE2Nw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZmJhNmJiMmFkNDhkZjI1NmI4NDYyOGEzMDkxNjBjMzcxYTY0OGMyZWY5NWYw
|
10
|
+
NTUzOTI3N2FlYmU5NzQ4ZTZjN2NiNTlhMWU4NjQ3NWRhMmYwOTE3ZTAwZGE0
|
11
|
+
ODNiYzM4NjYzOTdmMDVmMGFjNWQ4ZTQ4ZGVkZTFiOTAyNWNlMGY=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YzQ2YTM4YzllNDRhYzQzZjhkYWVmNjY0MThlMTZiZTg0ZjIwNWZlZDRiNmM5
|
14
|
+
ZmM5MWM4NDc4ZDViMDAwOWFhM2JjYTU1ZTI0YWVhNDQyYzMzZDJlYjY1MDkw
|
15
|
+
YzA1M2U1MmVlN2FlMmE4OGRkODAwYTA1Y2RhNTNhMGZlZDdiODk=
|
@@ -2,11 +2,11 @@ module DataKitten
|
|
2
2
|
|
3
3
|
# A specific available form of a dataset, such as a CSV file, an API, or an RSS feed.
|
4
4
|
#
|
5
|
-
# Based on {http://www.w3.org/TR/vocab-dcat/#class-distribution dcat:Distribution}, but
|
5
|
+
# Based on {http://www.w3.org/TR/vocab-dcat/#class-distribution dcat:Distribution}, but
|
6
6
|
# with useful aliases for other vocabularies.
|
7
7
|
#
|
8
8
|
class Distribution
|
9
|
-
|
9
|
+
|
10
10
|
# @!attribute format
|
11
11
|
# @return [DistributionFormat] the file format of the distribution.
|
12
12
|
attr_accessor :format
|
@@ -34,30 +34,30 @@ module DataKitten
|
|
34
34
|
# change to a more structured object later.
|
35
35
|
attr_accessor :schema
|
36
36
|
|
37
|
+
# @!attribute extension
|
38
|
+
# @return [String] the file extension of the distribution
|
39
|
+
attr_accessor :extension
|
40
|
+
|
37
41
|
# Create a new Distribution. Currently only loads from Datapackage +resource+ hashes.
|
38
42
|
#
|
39
43
|
# @param dataset [Dataset] the {Dataset} that this is a part of.
|
40
44
|
# @param options [Hash] A set of options with which to initialise the distribution.
|
41
|
-
# @option options [String] :datapackage_resource the +resource+ section of a Datapackage
|
45
|
+
# @option options [String] :datapackage_resource the +resource+ section of a Datapackage
|
42
46
|
# representation to load information from.
|
43
|
-
def initialize(dataset, options)
|
47
|
+
def initialize(dataset, options)
|
44
48
|
# Store dataset
|
45
49
|
@dataset = dataset
|
46
50
|
# Parse datapackage
|
47
51
|
if r = options[:datapackage_resource]
|
48
52
|
# Load basics
|
49
53
|
@description = r['description']
|
50
|
-
# Load HTTP Response for further use
|
51
|
-
if r['url']
|
52
|
-
@response = Curl::Easy.http_head(r['url'])
|
53
|
-
end
|
54
54
|
# Work out format
|
55
55
|
@format = begin
|
56
|
-
extension = r['format']
|
57
|
-
if extension.nil?
|
58
|
-
extension = r['path'].is_a?(String) ? r['path'].split('.').last.upcase : nil
|
56
|
+
@extension = r['format']
|
57
|
+
if @extension.nil?
|
58
|
+
@extension = r['path'].is_a?(String) ? r['path'].split('.').last.upcase : nil
|
59
59
|
end
|
60
|
-
extension ? DistributionFormat.new(
|
60
|
+
@extension ? DistributionFormat.new(self) : nil
|
61
61
|
end
|
62
62
|
# Get CSV dialect
|
63
63
|
@dialect = r['dialect']
|
@@ -76,19 +76,14 @@ module DataKitten
|
|
76
76
|
@title = r[:title]
|
77
77
|
@description = r[:title]
|
78
78
|
@access_url = r[:accessURL]
|
79
|
+
@extension = r[:format]
|
79
80
|
# Load HTTP Response for further use
|
80
|
-
|
81
|
-
@response = Curl::Easy.http_head(@access_url) do |c|
|
82
|
-
c.follow_location = true
|
83
|
-
c.useragent = "curb"
|
84
|
-
end
|
85
|
-
end
|
86
|
-
@format = r[:format] ? DistributionFormat.new(r[:format], @response) : nil
|
81
|
+
@format = r[:format] ? DistributionFormat.new(self) : nil
|
87
82
|
end
|
88
83
|
# Set default CSV dialect
|
89
84
|
@dialect ||= {
|
90
85
|
"delimiter" => ","
|
91
|
-
}
|
86
|
+
}
|
92
87
|
end
|
93
88
|
|
94
89
|
# A usable name for the distribution, unique within the {Dataset}.
|
@@ -112,13 +107,13 @@ module DataKitten
|
|
112
107
|
end
|
113
108
|
end
|
114
109
|
end
|
115
|
-
|
110
|
+
|
116
111
|
# Whether the file that the distribution represents actually exists
|
117
112
|
#
|
118
113
|
# @return [Boolean] whether the HTTP response returns a success code or not
|
119
114
|
def exists?
|
120
115
|
if @access_url
|
121
|
-
|
116
|
+
http_head.response_code != 404
|
122
117
|
end
|
123
118
|
end
|
124
119
|
|
@@ -134,9 +129,9 @@ module DataKitten
|
|
134
129
|
end
|
135
130
|
if datafile
|
136
131
|
case format.extension
|
137
|
-
when :csv
|
132
|
+
when :csv
|
138
133
|
CSV.parse(
|
139
|
-
datafile,
|
134
|
+
datafile,
|
140
135
|
:headers => true,
|
141
136
|
:col_sep => @dialect["delimiter"]
|
142
137
|
)
|
@@ -151,6 +146,17 @@ module DataKitten
|
|
151
146
|
end
|
152
147
|
end
|
153
148
|
|
154
|
-
|
149
|
+
def http_head
|
150
|
+
if @access_url
|
151
|
+
@http_head ||= begin
|
152
|
+
Curl::Easy.http_head(@access_url) do |c|
|
153
|
+
c.follow_location = true
|
154
|
+
c.useragent = "curb"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
155
161
|
|
156
162
|
end
|
@@ -1,23 +1,22 @@
|
|
1
1
|
module DataKitten
|
2
2
|
|
3
3
|
# A file format for a distribution
|
4
|
-
#
|
4
|
+
#
|
5
5
|
# For instance CSV, XML, etc.
|
6
6
|
#
|
7
7
|
class DistributionFormat
|
8
|
-
|
8
|
+
|
9
9
|
#@!attribute extension
|
10
10
|
#@return [Symbol] a symbol for the file extension. For instance, :csv.
|
11
11
|
attr_reader :extension
|
12
12
|
|
13
13
|
# Create a new DistributionFormat object with the relevant extension
|
14
14
|
#
|
15
|
-
# @param
|
16
|
-
def initialize(
|
15
|
+
# @param distribution [Distribution] the distribution for the format
|
16
|
+
def initialize(distribution)
|
17
|
+
@distribution = distribution
|
17
18
|
# Store extension as a lowercase symbol
|
18
|
-
@extension = extension.to_s.downcase.to_sym
|
19
|
-
# Store response for later use
|
20
|
-
@response = response
|
19
|
+
@extension = distribution.extension.to_s.downcase.to_sym
|
21
20
|
# Set up format lists
|
22
21
|
@@formats ||= {
|
23
22
|
csv: { structured: true, open: true },
|
@@ -37,7 +36,7 @@ module DataKitten
|
|
37
36
|
shp: { structured: true, open: true },
|
38
37
|
html: { structured: false, open: true },
|
39
38
|
doc: { structured: false, open: false },
|
40
|
-
pdf: { structured: false, open: true },
|
39
|
+
pdf: { structured: false, open: true },
|
41
40
|
}
|
42
41
|
end
|
43
42
|
|
@@ -54,7 +53,7 @@ module DataKitten
|
|
54
53
|
def open?
|
55
54
|
@@formats[@extension][:open] rescue nil
|
56
55
|
end
|
57
|
-
|
56
|
+
|
58
57
|
# Whether the format of the file matches the extension given by the data
|
59
58
|
#
|
60
59
|
# @return [Boolean] whether the MIME type given in the HTTP response matches the data or not
|
@@ -62,12 +61,12 @@ module DataKitten
|
|
62
61
|
begin
|
63
62
|
mimes = []
|
64
63
|
MIME::Types.type_for(@extension.to_s).each { |i| mimes << i.content_type }
|
65
|
-
!!(@
|
64
|
+
!!(@distribution.http_head.content_type =~ /#{mimes.join('|')}/) || false
|
66
65
|
rescue
|
67
66
|
nil
|
68
67
|
end
|
69
68
|
end
|
70
69
|
|
71
|
-
end
|
70
|
+
end
|
72
71
|
|
73
72
|
end
|
@@ -171,7 +171,7 @@ module DataKitten
|
|
171
171
|
|
172
172
|
[
|
173
173
|
Agent.new(
|
174
|
-
:name => @group["display_name"] || @group["result"]["
|
174
|
+
:name => @group["display_name"] || @group["result"]["title"],
|
175
175
|
:homepage => select_extras(@group, "website-url"),
|
176
176
|
:mbox => select_extras(@group, "contact-email")
|
177
177
|
)
|
data/lib/data_kitten/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kitten
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Smith
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-09-
|
12
|
+
date: 2014-09-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|