data_kitten 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ module DataKitten
2
+
3
+ module PublishingFormats
4
+
5
+ module LinkedData
6
+
7
+ ACCEPT_HEADER = "text/turtle, application/n-triples, application/ld+json; q=1.0,application/rdf+xml; q=0.8, */*; q=0.5"
8
+
9
+ include RDFa
10
+
11
+ private
12
+
13
+ #Find first resource with one of the specified RDF types
14
+ def self.first_of_type(graph, classes)
15
+ term = nil
16
+ classes.each do |clazz|
17
+ term = graph.first_subject(
18
+ RDF::Query::Pattern.new( nil, RDF.type, clazz ) )
19
+ break if term
20
+ end
21
+ term
22
+ end
23
+
24
+ #Attempt to create an RDF graph for this object
25
+ #
26
+ #Supports content negotiation for various RDF serializations. Attempts "dataset autodiscovery" if it receives
27
+ #an HTML response. This leaves the RDFa Publishing Format to just parse RDFa responses
28
+ def self.create_graph(uri)
29
+
30
+ resp = RestClient.get uri,
31
+ :accept=>ACCEPT_HEADER
32
+ return false if resp.code != 200
33
+
34
+ if resp.headers[:content_type] =~ /text\/html/
35
+ doc = Nokogiri::HTML( resp.body )
36
+ link = doc.search('link[rel=alternate]').detect { |n| n[:type] == 'application/rdf+xml' }
37
+ if link
38
+ resp = RestClient.get link["href"],
39
+ :accept=>ACCEPT_HEADER
40
+ return false if resp.code != 200
41
+ else
42
+ return false
43
+ end
44
+ end
45
+
46
+ reader = RDF::Reader.for( :content_type => resp.headers[:content_type] )
47
+
48
+ if !reader
49
+ extension = File.extname( uri ).gsub(".", "")
50
+ reader = RDF::Reader.for( :file_extension => extension ) if extension != ""
51
+ end
52
+ return false unless reader
53
+
54
+ graph = RDF::Graph.new()
55
+ graph << reader.new( StringIO.new( resp.body ))
56
+
57
+ return graph
58
+ rescue => e
59
+ #puts e
60
+ #puts e.backtrace
61
+ nil
62
+ end
63
+
64
+ #Can we create an RDF graph for this object containing the description of a dataset?
65
+ def self.supported?(instance)
66
+ graph = create_graph(instance.uri)
67
+ return false unless graph
68
+ return true if first_of_type(graph,
69
+ [RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset,
70
+ RDF::Vocabulary.new("http://rdfs.org/ns/void#").Dataset])
71
+ return false
72
+ end
73
+
74
+ public
75
+
76
+ # The publishing format for the dataset.
77
+ # @return [Symbol] +:rdfa+
78
+ # @see Dataset#publishing_format
79
+ def publishing_format
80
+ :rdf
81
+ end
82
+
83
+ def uri
84
+ access_url
85
+ end
86
+
87
+ private
88
+
89
+ def dataset_uri
90
+ access_url
91
+ end
92
+
93
+ def graph
94
+ if !@graph
95
+ @graph = LinkedData.create_graph(access_url)
96
+ end
97
+ @graph
98
+ end
99
+
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,239 @@
1
+ module DataKitten
2
+
3
+ module PublishingFormats
4
+
5
+ module RDFa
6
+
7
+ private
8
+
9
+ def self.supported?(instance)
10
+ graph = RDF::Graph.load(instance.uri, :format => :rdfa)
11
+
12
+ query = RDF::Query.new({
13
+ :dataset => {
14
+ RDF.type => RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset
15
+ }
16
+ })
17
+
18
+ query.execute(graph)[0][:dataset].to_s
19
+ rescue
20
+ false
21
+ end
22
+
23
+ public
24
+
25
+ # The publishing format for the dataset.
26
+ # @return [Symbol] +:rdfa+
27
+ # @see Dataset#publishing_format
28
+ def publishing_format
29
+ :rdfa
30
+ end
31
+
32
+ # A list of maintainers.
33
+ #
34
+ # @see Dataset#maintainers
35
+ def maintainers
36
+ []
37
+ end
38
+
39
+ # A list of publishers.
40
+ #
41
+ # @see Dataset#publishers
42
+ def publishers
43
+ publishers = []
44
+ uris = metadata[dataset_uri][RDF::DC.publisher.to_s]
45
+ uris.each do |publisher_uri|
46
+ publishers << Agent.new(:name => first_value( publisher_uri, RDF::FOAF.name ),
47
+ :homepage => first_value( publisher_uri, RDF::FOAF.homepage ),
48
+ :mbox => first_value( publisher_uri, RDF::FOAF.mbox ))
49
+ end
50
+ return publishers
51
+ rescue
52
+ []
53
+ end
54
+
55
+ # The rights statment for the data
56
+ #
57
+ # @see Dataset#rights
58
+ def rights
59
+ rights_uri = metadata[dataset_uri][RDF::DC.rights.to_s][0]
60
+ if !metadata[rights_uri]
61
+ return Rights.new(:uri => rights_uri)
62
+ else
63
+ return Rights.new(:uri => uri,
64
+ :dataLicense => first_value( rights_uri, odrs.dataLicense ),
65
+ :contentLicense => first_value( rights_uri, odrs.contentLicense ),
66
+ :copyrightNotice => first_value( rights_uri, odrs.copyrightNotice ),
67
+ :attributionURL => first_value( rights_uri, odrs.attributionURL ),
68
+ :attributionText => first_value( rights_uri, odrs.attributionText ),
69
+ :copyrightHolder => first_value( rights_uri, odrs.copyrightHolder ),
70
+ :databaseRightHolder => first_value( rights_uri, odrs.databaseRightHolder ),
71
+ :copyrightYear => first_value( rights_uri, odrs.copyrightYear ),
72
+ :databaseRightYear => first_value( rights_uri, odrs.databaseRightYear ),
73
+ :copyrightStatement => first_value( rights_uri, odrs.copyrightStatement ),
74
+ :databaseRightStatement => first_value( rights_uri, odrs.databaseRightStatement )
75
+ )
76
+ end
77
+ rescue => e
78
+ #puts e
79
+ #puts e.backtrace
80
+ nil
81
+ end
82
+
83
+ # A list of licenses.
84
+ #
85
+ # @see Dataset#licenses
86
+ def licenses
87
+ licenses = []
88
+ uris = metadata[dataset_uri][RDF::DC.license.to_s]
89
+ if uris.nil?
90
+ []
91
+ else
92
+ uris.each do |license_uri|
93
+ licenses << License.new(:uri => license_uri, :name => first_value( license_uri, RDF::DC.title ))
94
+ end
95
+ return licenses
96
+ end
97
+ rescue => e
98
+ []
99
+ end
100
+
101
+ # A list of contributors.
102
+ #
103
+ # @see Dataset#contributors
104
+ def contributors
105
+ []
106
+ end
107
+
108
+ # A list of distributions, referred to as +resources+ by Datapackage.
109
+ #
110
+ # @see Dataset#distributions
111
+ def distributions
112
+ distributions = []
113
+ uris = metadata[dataset_uri][dcat.distribution.to_s]
114
+ uris.each do |distribution_uri|
115
+ distribution = {
116
+ :title => first_value( distribution_uri, RDF::DC.title ),
117
+ :accessURL => first_value( distribution_uri, dcat.accessURL )
118
+ }
119
+ distributions << Distribution.new(self, dcat_resource: distribution)
120
+ end
121
+ return distributions
122
+ rescue
123
+ []
124
+ end
125
+
126
+ # The human-readable title of the dataset.
127
+ #
128
+ # @see Dataset#data_title
129
+ def data_title
130
+ metadata[dataset_uri][dct.title.to_s][0] rescue nil
131
+ end
132
+
133
+ # A brief description of the dataset
134
+ #
135
+ # @see Dataset#description
136
+ def description
137
+ metadata[dataset_uri][dct.description.to_s][0] rescue nil
138
+ end
139
+
140
+ # Keywords for the dataset
141
+ #
142
+ # @see Dataset#keywords
143
+ def keywords
144
+ keywords = []
145
+ metadata[dataset_uri][dcat.keyword.to_s].each do |k|
146
+ keywords << k
147
+ end
148
+ rescue
149
+ []
150
+ end
151
+
152
+ # Where the data is sourced from
153
+ #
154
+ # @see Dataset#sources
155
+ def sources
156
+ []
157
+ end
158
+
159
+ # How frequently the data is updated.
160
+ #
161
+ # @see Dataset#update_frequency
162
+ def update_frequency
163
+ first_value( dataset_uri, dcat.accrualPeriodicity )
164
+ end
165
+
166
+ def issued
167
+ date = first_value(dataset_uri, RDF::DC.issued) ||
168
+ first_value(dataset_uri, RDF::DC.created)
169
+ if date
170
+ return Date.parse( date )
171
+ end
172
+ return nil
173
+ end
174
+
175
+ def modified
176
+ date = first_value(dataset_uri, RDF::DC.modified)
177
+ if date
178
+ return Date.parse( date )
179
+ end
180
+ return nil
181
+ end
182
+
183
+ private
184
+
185
+ def graph
186
+ @graph ||= RDF::Graph.load(uri, :format => :rdfa)
187
+ end
188
+
189
+ def first_value(resource, property, default=nil)
190
+ if metadata[resource] && metadata[resource][property.to_s]
191
+ return metadata[resource][property.to_s][0]
192
+ end
193
+ return default
194
+ end
195
+
196
+ def metadata
197
+ @metadata ||= {}
198
+
199
+ # This is UGLY, and exists solely to make getting data out of the graph easier. We will probably change this later
200
+ graph.triples.each do |triple|
201
+ @metadata[triple[0].to_s] ||= {}
202
+ @metadata[triple[0].to_s][triple[1].to_s] ||= []
203
+ @metadata[triple[0].to_s][triple[1].to_s] << triple[2].to_s unless @metadata[triple[0].to_s][triple[1].to_s].include? triple[2].to_s
204
+ end
205
+
206
+ return @metadata
207
+ end
208
+
209
+ def dataset_uri
210
+ query = RDF::Query.new({
211
+ :dataset => {
212
+ RDF.type => dcat.Dataset
213
+ }
214
+ })
215
+
216
+ query.execute(graph)[0][:dataset].to_s
217
+ end
218
+
219
+ def dcat
220
+ RDF::Vocabulary.new("http://www.w3.org/ns/dcat#")
221
+ end
222
+
223
+ def dct
224
+ RDF::Vocabulary.new("http://purl.org/dc/terms/")
225
+ end
226
+
227
+ def odrs
228
+ RDF::Vocabulary.new("http://schema.theodi.org/odrs#")
229
+ end
230
+
231
+ def void
232
+ RDF::Vocabulary.new("http://rdfs.org/ns/void#")
233
+ end
234
+
235
+ end
236
+
237
+ end
238
+
239
+ end
@@ -0,0 +1,80 @@
1
+ module DataKitten
2
+
3
+ # A rights statement for a {Dataset} or {Distribution}
4
+ #
5
+ class Rights
6
+
7
+ # @!attribute uri
8
+ # @return [String] the URI for the rights statement
9
+ attr_accessor :uri
10
+
11
+ # @!attribute dataLicense
12
+ # @return [String] the license for the data in the dataset.
13
+ attr_accessor :dataLicense
14
+
15
+ # @!attribute contentLicense
16
+ # @return [String] the license for the content in the dataset.
17
+ attr_accessor :contentLicense
18
+
19
+ # @!attribute copyrightNotice
20
+ # @return [String] the copyright notice for the dataset.
21
+ attr_accessor :copyrightNotice
22
+
23
+ # @!attribute attributionURL
24
+ # @return [String] the attribution URL for the dataset.
25
+ attr_accessor :attributionURL
26
+
27
+ # @!attribute attributionText
28
+ # @return [String] the attribution text for the dataset.
29
+ attr_accessor :attributionText
30
+
31
+ # @!attribute copyrightHolder
32
+ # @return [String] the URI of the organization that holds copyright for this dataset
33
+ attr_accessor :copyrightHolder
34
+
35
+ # @!attribute databaseRightHolder
36
+ # @return [String] the URI of the organization that owns the database rights for this dataset
37
+ attr_accessor :databaseRightHolder
38
+
39
+ # @!attribute copyrightYear
40
+ # @return [String] the year in which copyright is claimed
41
+ attr_accessor :copyrightYear
42
+
43
+ # @!attribute databaseRightYear
44
+ # @return [String] the year in which copyright is claimed
45
+ attr_accessor :databaseRightYear
46
+
47
+ # @!attribute copyrightStatement
48
+ # @return [String] the URL of a copyright statement for the dataset
49
+ attr_accessor :copyrightStatement
50
+
51
+ # @!attribute databaseRightStatement
52
+ # @return [String] the URL of a database right statement for the dataset
53
+ attr_accessor :databaseRightStatement
54
+
55
+ # Create a new Rights object.
56
+ #
57
+ # @param options [Hash] A set of options with which to initialise the license.
58
+ # @option options [String] :dataLicense the license for the data in the dataset
59
+ # @option options [String] :contentLicense the license for the content in the dataset
60
+ # @option options [String] :copyrightNotice the copyright notice for the dataset
61
+ # @option options [String] :attributionURL the attribution URL for the dataset
62
+ # @option options [String] :attributionText attribution name for the dataset
63
+ def initialize(options)
64
+ @uri = options[:uri]
65
+ @dataLicense = options[:dataLicense]
66
+ @contentLicense = options[:contentLicense]
67
+ @copyrightNotice = options[:copyrightNotice]
68
+ @attributionURL = options[:attributionURL]
69
+ @attributionText = options[:attributionText]
70
+ @copyrightHolder = options[:copyrightHolder]
71
+ @databaseRightHolder = options[:databaseRightHolder]
72
+ @copyrightYear = options[:copyrightYear]
73
+ @databaseRightYear = options[:databaseRightYear]
74
+ @copyrightStatement = options[:copyrightStatement]
75
+ @databaseRightStatement = options[:databaseRightStatement]
76
+ end
77
+
78
+ end
79
+
80
+ end
@@ -0,0 +1,31 @@
1
+ module DataKitten
2
+
3
+ # Where the data has been sourced from
4
+ # Follows the pattern of {http://purl.org/dc/terms/source} with a {http://www.w3.org/2000/01/rdf-schema#label} and a {http://www.w3.org/1999/02/22-rdf-syntax-ns#resource}, and with useful aliases for other vocabularies
5
+
6
+ class Source
7
+
8
+ # Create a new Source
9
+ #
10
+ # @param [Hash] options the details of the Source.
11
+ # @option options [String] :label The name of the Source
12
+ # @option options [String] :resource The URI of the Source
13
+ #
14
+ def initialize(options)
15
+ @label = options[:label]
16
+ @resource = options[:resource]
17
+ end
18
+
19
+ # @!attribute label
20
+ # @return [String] the name of the Source
21
+ attr_accessor :label
22
+ alias_method :name, :label
23
+
24
+ # @!attribute label
25
+ # @return [String] the URI of the Source
26
+ attr_accessor :resource
27
+ alias_method :web, :resource
28
+
29
+ end
30
+
31
+ end