data_kitten 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,102 @@
1
+ module DataKitten
2
+
3
+ module PublishingFormats
4
+
5
+ module LinkedData
6
+
7
+ ACCEPT_HEADER = "text/turtle, application/n-triples, application/ld+json; q=1.0,application/rdf+xml; q=0.8, */*; q=0.5"
8
+
9
+ include RDFa
10
+
11
+ private
12
+
13
+ #Find first resource with one of the specified RDF types
14
+ def self.first_of_type(graph, classes)
15
+ term = nil
16
+ classes.each do |clazz|
17
+ term = graph.first_subject(
18
+ RDF::Query::Pattern.new( nil, RDF.type, clazz ) )
19
+ break if term
20
+ end
21
+ term
22
+ end
23
+
24
+ #Attempt to create an RDF graph for this object
25
+ #
26
+ #Supports content negotiation for various RDF serializations. Attempts "dataset autodiscovery" if it receives
27
+ #an HTML response. This leaves the RDFa Publishing Format to just parse RDFa responses
28
+ def self.create_graph(uri)
29
+
30
+ resp = RestClient.get uri,
31
+ :accept=>ACCEPT_HEADER
32
+ return false if resp.code != 200
33
+
34
+ if resp.headers[:content_type] =~ /text\/html/
35
+ doc = Nokogiri::HTML( resp.body )
36
+ link = doc.search('link[rel=alternate]').detect { |n| n[:type] == 'application/rdf+xml' }
37
+ if link
38
+ resp = RestClient.get link["href"],
39
+ :accept=>ACCEPT_HEADER
40
+ return false if resp.code != 200
41
+ else
42
+ return false
43
+ end
44
+ end
45
+
46
+ reader = RDF::Reader.for( :content_type => resp.headers[:content_type] )
47
+
48
+ if !reader
49
+ extension = File.extname( uri ).gsub(".", "")
50
+ reader = RDF::Reader.for( :file_extension => extension ) if extension != ""
51
+ end
52
+ return false unless reader
53
+
54
+ graph = RDF::Graph.new()
55
+ graph << reader.new( StringIO.new( resp.body ))
56
+
57
+ return graph
58
+ rescue => e
59
+ #puts e
60
+ #puts e.backtrace
61
+ nil
62
+ end
63
+
64
+ #Can we create an RDF graph for this object containing the description of a dataset?
65
+ def self.supported?(instance)
66
+ graph = create_graph(instance.uri)
67
+ return false unless graph
68
+ return true if first_of_type(graph,
69
+ [RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset,
70
+ RDF::Vocabulary.new("http://rdfs.org/ns/void#").Dataset])
71
+ return false
72
+ end
73
+
74
+ public
75
+
76
+ # The publishing format for the dataset.
77
+ # @return [Symbol] +:rdfa+
78
+ # @see Dataset#publishing_format
79
+ def publishing_format
80
+ :rdf
81
+ end
82
+
83
+ def uri
84
+ access_url
85
+ end
86
+
87
+ private
88
+
89
+ def dataset_uri
90
+ access_url
91
+ end
92
+
93
+ def graph
94
+ if !@graph
95
+ @graph = LinkedData.create_graph(access_url)
96
+ end
97
+ @graph
98
+ end
99
+
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,239 @@
1
+ module DataKitten
2
+
3
+ module PublishingFormats
4
+
5
+ module RDFa
6
+
7
+ private
8
+
9
+ def self.supported?(instance)
10
+ graph = RDF::Graph.load(instance.uri, :format => :rdfa)
11
+
12
+ query = RDF::Query.new({
13
+ :dataset => {
14
+ RDF.type => RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset
15
+ }
16
+ })
17
+
18
+ query.execute(graph)[0][:dataset].to_s
19
+ rescue
20
+ false
21
+ end
22
+
23
+ public
24
+
25
+ # The publishing format for the dataset.
26
+ # @return [Symbol] +:rdfa+
27
+ # @see Dataset#publishing_format
28
+ def publishing_format
29
+ :rdfa
30
+ end
31
+
32
+ # A list of maintainers.
33
+ #
34
+ # @see Dataset#maintainers
35
+ def maintainers
36
+ []
37
+ end
38
+
39
+ # A list of publishers.
40
+ #
41
+ # @see Dataset#publishers
42
+ def publishers
43
+ publishers = []
44
+ uris = metadata[dataset_uri][RDF::DC.publisher.to_s]
45
+ uris.each do |publisher_uri|
46
+ publishers << Agent.new(:name => first_value( publisher_uri, RDF::FOAF.name ),
47
+ :homepage => first_value( publisher_uri, RDF::FOAF.homepage ),
48
+ :mbox => first_value( publisher_uri, RDF::FOAF.mbox ))
49
+ end
50
+ return publishers
51
+ rescue
52
+ []
53
+ end
54
+
55
+ # The rights statment for the data
56
+ #
57
+ # @see Dataset#rights
58
+ def rights
59
+ rights_uri = metadata[dataset_uri][RDF::DC.rights.to_s][0]
60
+ if !metadata[rights_uri]
61
+ return Rights.new(:uri => rights_uri)
62
+ else
63
+ return Rights.new(:uri => uri,
64
+ :dataLicense => first_value( rights_uri, odrs.dataLicense ),
65
+ :contentLicense => first_value( rights_uri, odrs.contentLicense ),
66
+ :copyrightNotice => first_value( rights_uri, odrs.copyrightNotice ),
67
+ :attributionURL => first_value( rights_uri, odrs.attributionURL ),
68
+ :attributionText => first_value( rights_uri, odrs.attributionText ),
69
+ :copyrightHolder => first_value( rights_uri, odrs.copyrightHolder ),
70
+ :databaseRightHolder => first_value( rights_uri, odrs.databaseRightHolder ),
71
+ :copyrightYear => first_value( rights_uri, odrs.copyrightYear ),
72
+ :databaseRightYear => first_value( rights_uri, odrs.databaseRightYear ),
73
+ :copyrightStatement => first_value( rights_uri, odrs.copyrightStatement ),
74
+ :databaseRightStatement => first_value( rights_uri, odrs.databaseRightStatement )
75
+ )
76
+ end
77
+ rescue => e
78
+ #puts e
79
+ #puts e.backtrace
80
+ nil
81
+ end
82
+
83
+ # A list of licenses.
84
+ #
85
+ # @see Dataset#licenses
86
+ def licenses
87
+ licenses = []
88
+ uris = metadata[dataset_uri][RDF::DC.license.to_s]
89
+ if uris.nil?
90
+ []
91
+ else
92
+ uris.each do |license_uri|
93
+ licenses << License.new(:uri => license_uri, :name => first_value( license_uri, RDF::DC.title ))
94
+ end
95
+ return licenses
96
+ end
97
+ rescue => e
98
+ []
99
+ end
100
+
101
+ # A list of contributors.
102
+ #
103
+ # @see Dataset#contributors
104
+ def contributors
105
+ []
106
+ end
107
+
108
+ # A list of distributions, referred to as +resources+ by Datapackage.
109
+ #
110
+ # @see Dataset#distributions
111
+ def distributions
112
+ distributions = []
113
+ uris = metadata[dataset_uri][dcat.distribution.to_s]
114
+ uris.each do |distribution_uri|
115
+ distribution = {
116
+ :title => first_value( distribution_uri, RDF::DC.title ),
117
+ :accessURL => first_value( distribution_uri, dcat.accessURL )
118
+ }
119
+ distributions << Distribution.new(self, dcat_resource: distribution)
120
+ end
121
+ return distributions
122
+ rescue
123
+ []
124
+ end
125
+
126
+ # The human-readable title of the dataset.
127
+ #
128
+ # @see Dataset#data_title
129
+ def data_title
130
+ metadata[dataset_uri][dct.title.to_s][0] rescue nil
131
+ end
132
+
133
+ # A brief description of the dataset
134
+ #
135
+ # @see Dataset#description
136
+ def description
137
+ metadata[dataset_uri][dct.description.to_s][0] rescue nil
138
+ end
139
+
140
+ # Keywords for the dataset
141
+ #
142
+ # @see Dataset#keywords
143
+ def keywords
144
+ keywords = []
145
+ metadata[dataset_uri][dcat.keyword.to_s].each do |k|
146
+ keywords << k
147
+ end
148
+ rescue
149
+ []
150
+ end
151
+
152
+ # Where the data is sourced from
153
+ #
154
+ # @see Dataset#sources
155
+ def sources
156
+ []
157
+ end
158
+
159
+ # How frequently the data is updated.
160
+ #
161
+ # @see Dataset#update_frequency
162
+ def update_frequency
163
+ first_value( dataset_uri, dcat.accrualPeriodicity )
164
+ end
165
+
166
+ def issued
167
+ date = first_value(dataset_uri, RDF::DC.issued) ||
168
+ first_value(dataset_uri, RDF::DC.created)
169
+ if date
170
+ return Date.parse( date )
171
+ end
172
+ return nil
173
+ end
174
+
175
+ def modified
176
+ date = first_value(dataset_uri, RDF::DC.modified)
177
+ if date
178
+ return Date.parse( date )
179
+ end
180
+ return nil
181
+ end
182
+
183
+ private
184
+
185
+ def graph
186
+ @graph ||= RDF::Graph.load(uri, :format => :rdfa)
187
+ end
188
+
189
+ def first_value(resource, property, default=nil)
190
+ if metadata[resource] && metadata[resource][property.to_s]
191
+ return metadata[resource][property.to_s][0]
192
+ end
193
+ return default
194
+ end
195
+
196
+ def metadata
197
+ @metadata ||= {}
198
+
199
+ # This is UGLY, and exists solely to make getting data out of the graph easier. We will probably change this later
200
+ graph.triples.each do |triple|
201
+ @metadata[triple[0].to_s] ||= {}
202
+ @metadata[triple[0].to_s][triple[1].to_s] ||= []
203
+ @metadata[triple[0].to_s][triple[1].to_s] << triple[2].to_s unless @metadata[triple[0].to_s][triple[1].to_s].include? triple[2].to_s
204
+ end
205
+
206
+ return @metadata
207
+ end
208
+
209
+ def dataset_uri
210
+ query = RDF::Query.new({
211
+ :dataset => {
212
+ RDF.type => dcat.Dataset
213
+ }
214
+ })
215
+
216
+ query.execute(graph)[0][:dataset].to_s
217
+ end
218
+
219
+ def dcat
220
+ RDF::Vocabulary.new("http://www.w3.org/ns/dcat#")
221
+ end
222
+
223
+ def dct
224
+ RDF::Vocabulary.new("http://purl.org/dc/terms/")
225
+ end
226
+
227
+ def odrs
228
+ RDF::Vocabulary.new("http://schema.theodi.org/odrs#")
229
+ end
230
+
231
+ def void
232
+ RDF::Vocabulary.new("http://rdfs.org/ns/void#")
233
+ end
234
+
235
+ end
236
+
237
+ end
238
+
239
+ end
@@ -0,0 +1,80 @@
1
+ module DataKitten
2
+
3
+ # A rights statement for a {Dataset} or {Distribution}
4
+ #
5
+ class Rights
6
+
7
+ # @!attribute uri
8
+ # @return [String] the URI for the rights statement
9
+ attr_accessor :uri
10
+
11
+ # @!attribute dataLicense
12
+ # @return [String] the license for the data in the dataset.
13
+ attr_accessor :dataLicense
14
+
15
+ # @!attribute contentLicense
16
+ # @return [String] the license for the content in the dataset.
17
+ attr_accessor :contentLicense
18
+
19
+ # @!attribute copyrightNotice
20
+ # @return [String] the copyright notice for the dataset.
21
+ attr_accessor :copyrightNotice
22
+
23
+ # @!attribute attributionURL
24
+ # @return [String] the attribution URL for the dataset.
25
+ attr_accessor :attributionURL
26
+
27
+ # @!attribute attributionText
28
+ # @return [String] the attribution text for the dataset.
29
+ attr_accessor :attributionText
30
+
31
+ # @!attribute copyrightHolder
32
+ # @return [String] the URI of the organization that holds copyright for this dataset
33
+ attr_accessor :copyrightHolder
34
+
35
+ # @!attribute databaseRightHolder
36
+ # @return [String] the URI of the organization that owns the database rights for this dataset
37
+ attr_accessor :databaseRightHolder
38
+
39
+ # @!attribute copyrightYear
40
+ # @return [String] the year in which copyright is claimed
41
+ attr_accessor :copyrightYear
42
+
43
+ # @!attribute databaseRightYear
44
+ # @return [String] the year in which copyright is claimed
45
+ attr_accessor :databaseRightYear
46
+
47
+ # @!attribute copyrightStatement
48
+ # @return [String] the URL of a copyright statement for the dataset
49
+ attr_accessor :copyrightStatement
50
+
51
+ # @!attribute databaseRightStatement
52
+ # @return [String] the URL of a database right statement for the dataset
53
+ attr_accessor :databaseRightStatement
54
+
55
+ # Create a new Rights object.
56
+ #
57
+ # @param options [Hash] A set of options with which to initialise the license.
58
+ # @option options [String] :dataLicense the license for the data in the dataset
59
+ # @option options [String] :contentLicense the license for the content in the dataset
60
+ # @option options [String] :copyrightNotice the copyright notice for the dataset
61
+ # @option options [String] :attributionURL the attribution URL for the dataset
62
+ # @option options [String] :attributionText attribution name for the dataset
63
+ def initialize(options)
64
+ @uri = options[:uri]
65
+ @dataLicense = options[:dataLicense]
66
+ @contentLicense = options[:contentLicense]
67
+ @copyrightNotice = options[:copyrightNotice]
68
+ @attributionURL = options[:attributionURL]
69
+ @attributionText = options[:attributionText]
70
+ @copyrightHolder = options[:copyrightHolder]
71
+ @databaseRightHolder = options[:databaseRightHolder]
72
+ @copyrightYear = options[:copyrightYear]
73
+ @databaseRightYear = options[:databaseRightYear]
74
+ @copyrightStatement = options[:copyrightStatement]
75
+ @databaseRightStatement = options[:databaseRightStatement]
76
+ end
77
+
78
+ end
79
+
80
+ end
@@ -0,0 +1,31 @@
1
+ module DataKitten
2
+
3
+ # Where the data has been sourced from
4
+ # Follows the pattern of {http://purl.org/dc/terms/source} with a {http://www.w3.org/2000/01/rdf-schema#label} and a {http://www.w3.org/1999/02/22-rdf-syntax-ns#resource}, and with useful aliases for other vocabularies
5
+
6
+ class Source
7
+
8
+ # Create a new Source
9
+ #
10
+ # @param [Hash] options the details of the Source.
11
+ # @option options [String] :label The name of the Source
12
+ # @option options [String] :resource The URI of the Source
13
+ #
14
+ def initialize(options)
15
+ @label = options[:label]
16
+ @resource = options[:resource]
17
+ end
18
+
19
+ # @!attribute label
20
+ # @return [String] the name of the Source
21
+ attr_accessor :label
22
+ alias_method :name, :label
23
+
24
+ # @!attribute label
25
+ # @return [String] the URI of the Source
26
+ attr_accessor :resource
27
+ alias_method :web, :resource
28
+
29
+ end
30
+
31
+ end