annotations2triannon 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.env_example +44 -0
  3. data/.travis.yml +15 -0
  4. data/Gemfile +5 -0
  5. data/LICENSE +202 -0
  6. data/README.md +31 -0
  7. data/Rakefile +50 -0
  8. data/annotations2triannon.gemspec +58 -0
  9. data/bin/console +5 -0
  10. data/bin/ctags.rb +8 -0
  11. data/bin/dms.rb +175 -0
  12. data/bin/revs.rb +17 -0
  13. data/bin/revs_annotations2csv.sh +66 -0
  14. data/lib/annotations2triannon/annotation_list.rb +37 -0
  15. data/lib/annotations2triannon/configuration.rb +52 -0
  16. data/lib/annotations2triannon/iiif_annotation_list.rb +17 -0
  17. data/lib/annotations2triannon/iiif_collection.rb +56 -0
  18. data/lib/annotations2triannon/iiif_manifest.rb +32 -0
  19. data/lib/annotations2triannon/iiif_navigator.rb +172 -0
  20. data/lib/annotations2triannon/manifest.rb +86 -0
  21. data/lib/annotations2triannon/open_annotation.rb +262 -0
  22. data/lib/annotations2triannon/open_annotation_harvest.rb +37 -0
  23. data/lib/annotations2triannon/resource.rb +264 -0
  24. data/lib/annotations2triannon/revs.rb +263 -0
  25. data/lib/annotations2triannon/revs_db.rb +69 -0
  26. data/lib/annotations2triannon/shared_canvas_annotation_list.rb +18 -0
  27. data/lib/annotations2triannon/shared_canvas_manifest.rb +32 -0
  28. data/lib/annotations2triannon.rb +27 -0
  29. data/lib/rdf/vocab/Content.rb +112 -0
  30. data/lib/rdf/vocab/sc.rb +233 -0
  31. data/lib/requires.rb +69 -0
  32. data/log/.gitignore +4 -0
  33. data/spec/lib/annotations2triannon/configuration_spec.rb +24 -0
  34. data/spec/lib/annotations2triannon/open_annotation_spec.rb +176 -0
  35. data/spec/lib/annotations2triannon/resource_spec.rb +53 -0
  36. data/spec/lib/annotations2triannon_spec.rb +45 -0
  37. data/spec/spec_helper.rb +10 -0
  38. metadata +387 -0
@@ -0,0 +1,262 @@
1
+ require 'uuid'
2
+
3
+ module Annotations2triannon
4
+
5
+ # class OpenAnnotation < Resource
6
+ class OpenAnnotation
7
+
8
+ OA = RDF::Vocab::OA
9
+ OA_CONTEXT = 'http://www.w3.org/ns/oa.jsonld'
10
+ IIIF_CONTEXT = 'http://iiif.io/api/presentation/2/context.json'
11
+
12
+ attr_accessor :id
13
+ attr_accessor :graph # an RDF::Graph
14
+
15
+ # instantiate this class
16
+ # @param graph [RDF::Graph] for an open annotation
17
+ # @param id [UUID|URI|String] to identify an open annotation
18
+ def initialize(graph=RDF::Graph.new, id=nil)
19
+ @@agent ||= Annotations2triannon::AGENT
20
+ raise TypeError, 'graph must be RDF::Graph instance' unless graph.instance_of? RDF::Graph
21
+ if graph.empty?
22
+ # create a new open annotation
23
+ @graph = graph
24
+ id.nil? ? @id = get_id : @id = RDF::URI.parse(id)
25
+ insert_annotation
26
+ else
27
+ @graph = graph
28
+ raise TypeError, 'graph must be an open annotation' unless is_annotation?
29
+ if id.nil?
30
+ @id = get_id
31
+ else
32
+ end
33
+ end
34
+ end
35
+
36
+ def get_id
37
+ return @id unless @id.nil?
38
+ q = [nil, RDF.type, OA.Annotation]
39
+ @id = @graph.query(q).collect {|s| s.subject }.first || RDF::URI.parse(UUID.generate)
40
+ end
41
+
42
+ # @return [boolean] true if RDF.type is OA.Annotation, with OA.hasBody and OA.hasTarget
43
+ def open_annotation?
44
+ # TODO: check rules for basic open annotation
45
+ q = RDF::Query.new
46
+ q << [@id, RDF.type, OA.Annotation]
47
+ q << [@id, OA.hasBody, :b]
48
+ q << [@id, OA.hasTarget, :t]
49
+ @graph.query(q).size > 0
50
+ end
51
+
52
+ def insert_annotation
53
+ s = [@id, RDF.type, OA.Annotation]
54
+ @graph.delete(s)
55
+ @graph.insert(s)
56
+ end
57
+
58
+ # @return [boolean] true if RDF.type is OA.Annotation
59
+ def is_annotation?
60
+ q = [@id, RDF.type, OA.Annotation]
61
+ @graph.query(q).size > 0
62
+ end
63
+
64
+ def insert_hasTarget(target)
65
+ # TODO: raise ValueError when target is outside hasTarget range?
66
+ @graph.insert([@id, OA.hasTarget, target])
67
+ end
68
+
69
+ # @return [Array] The hasTarget object(s)
70
+ def hasTarget
71
+ q = [nil, OA.hasTarget, nil]
72
+ @graph.query(q).collect {|s| s.object }
73
+ end
74
+
75
+ def hasTarget?
76
+ hasTarget.length > 0
77
+ end
78
+
79
+ def insert_hasBody(body)
80
+ # TODO: raise ValueError when body is outside hasBody range?
81
+ @graph.insert([@id, OA.hasBody, body])
82
+ end
83
+
84
+ # @return [Array] The hasBody object(s)
85
+ def hasBody
86
+ q = [nil, OA.hasBody, nil]
87
+ @graph.query(q).collect {|s| s.object }
88
+ end
89
+
90
+ def hasBody?
91
+ hasBody.length > 0
92
+ end
93
+
94
+ def body_contentAsText
95
+ body_type RDF::CONTENT.ContentAsText
96
+ end
97
+
98
+ def body_contentAsText?
99
+ body_contentAsText.size > 0
100
+ end
101
+
102
+ # For all bodies that are of type ContentAsText, get the characters as a single String in the returned Array.
103
+ # @return [Array<String>] body chars as Strings, in an Array (one element for each contentAsText body)
104
+ def body_contentChars
105
+ q = RDF::Query.new
106
+ q << [nil, OA.hasBody, :body]
107
+ q << [:body, RDF.type, RDF::CONTENT.ContentAsText]
108
+ q << [:body, RDF::CONTENT.chars, :body_chars]
109
+ @graph.query(q).collect {|s| s.body_chars.value }
110
+ end
111
+
112
+ def body_semanticTag
113
+ body_type OA.SemanticTag
114
+ end
115
+
116
+ def body_semanticTag?
117
+ body_semanticTag.size > 0
118
+ end
119
+
120
+ def body_type(uri=nil)
121
+ uri = RDF::URI.parse(uri) unless uri.nil?
122
+ q = RDF::Query.new
123
+ q << [nil, OA.hasBody, :body]
124
+ q << [:body, RDF.type, uri]
125
+ @graph.query(q)
126
+ end
127
+
128
+ # Insert an ?o for [id, OA.motivatedBy, ?o] where ?o is 'motivation'
129
+ # @param motivation [String|URI] An open annotation motivation
130
+ def insert_motivatedBy(motivation)
131
+ # TODO: only accept values allowed by OA.motivationBy range?
132
+ motivation = RDF::URI.parse(motivation)
133
+ @graph.insert([@id, OA.motivatedBy, motivation])
134
+ end
135
+
136
+ # Find any matching ?o for ?s OA.motivatedBy ?o where ?o is 'uri'
137
+ # @param uri [RDF::URI|String|nil] Any object of a motivatedBy predicate
138
+ # @return [Array] The motivatedBy object(s)
139
+ def motivatedBy(uri=nil)
140
+ uri = RDF::URI.parse(uri) unless uri.nil?
141
+ q = [nil, OA.motivatedBy, uri]
142
+ @graph.query(q).collect {|s| s.object }
143
+ end
144
+
145
+ # Are there any matching ?o for [?s, OA.motivatedBy, ?o] where ?o is 'uri'
146
+ # @param uri [RDF::URI|String|nil] Any object of a motivatedBy predicate
147
+ # @return [boolean] True if the open annotation has any motivatedBy 'uri'
148
+ def motivatedBy?(uri=nil)
149
+ motivatedBy(uri).length > 0
150
+ end
151
+
152
+ # Insert [id, OA.motivatedBy, OA.commenting]
153
+ def insert_motivatedByCommenting
154
+ insert_motivatedBy OA.commenting
155
+ end
156
+
157
+ # Find all the matching ?s for [?s, OA.motivatedBy, OA.commenting]
158
+ def motivatedByCommenting
159
+ q = [nil, OA.motivatedBy, OA.commenting]
160
+ @graph.query(q).collect {|s| s.subject }
161
+ end
162
+
163
+ # Are there any matching ?s for [?s, OA.motivatedBy, OA.commenting]
164
+ def motivatedByCommenting?
165
+ motivatedByCommenting.length > 0
166
+ end
167
+
168
+ # Insert [id, OA.motivatedBy, OA.tagging]
169
+ def insert_motivatedByTagging
170
+ insert_motivatedBy OA.tagging
171
+ end
172
+
173
+ # Find all the matching ?s for [?s, OA.motivatedBy, OA.tagging]
174
+ def motivatedByTagging
175
+ q = [nil, OA.motivatedBy, OA.tagging]
176
+ @graph.query(q).collect {|s| s.subject }
177
+ end
178
+
179
+ # Are there any matching ?s for [?s, OA.motivatedBy, OA.tagging]
180
+ def motivatedByTagging?
181
+ motivatedByTagging.length > 0
182
+ end
183
+
184
+ def insert_annotatedBy(annotator=nil)
185
+ @graph.insert([@id, OA.annotatedBy, annotator])
186
+ end
187
+
188
+ # @return [Array<String>|nil] The identity for the annotatedBy object(s)
189
+ def annotatedBy
190
+ q = [:s, OA.annotatedBy, :o]
191
+ @graph.query(q).collect {|s| s.object }
192
+ end
193
+
194
+ # @param uri [RDF::URI|String|nil] Any object of an annotatedBy predicate
195
+ # @return [boolean] True if the open annotation has any annotatedBy 'uri'
196
+ def annotatedBy?(uri=nil)
197
+ uri = RDF::URI.parse(uri) unless uri.nil?
198
+ q = [nil, OA.annotatedBy, uri]
199
+ @graph.query(q).size > 0
200
+ end
201
+
202
+ def insert_annotatedAt(datetime=rdf_now)
203
+ @graph.insert([@id, OA.annotatedAt, datetime])
204
+ end
205
+
206
+ # @return [Array<String>|nil] The datetime from the annotatedAt object(s)
207
+ def annotatedAt
208
+ q = [nil, OA.annotatedAt, nil]
209
+ @graph.query(q).collect {|s| s.object }
210
+ end
211
+
212
+ def rdf_now
213
+ RDF::Literal.new(Time.now.utc, :datatype => RDF::XSD.dateTime)
214
+ end
215
+
216
+ def provenance
217
+ # http://www.openannotation.org/spec/core/core.html#Provenance
218
+ # When adding the agent, ensure it's not there already, also
219
+ # an open annotation cannot have more than one oa:serializedAt.
220
+ @graph.delete([nil,nil,@@agent])
221
+ @graph.delete([nil, OA.serializedAt, nil])
222
+ @graph << [@id, OA.serializedAt, rdf_now]
223
+ @graph << [@id, OA.serializedBy, @@agent]
224
+ end
225
+
226
+ # A json-ld representation of the open annotation
227
+ def as_jsonld
228
+ provenance
229
+ JSON::LD::API::fromRDF(@graph)
230
+ end
231
+
232
+ # @param context [String] A JSON-LD context URI
233
+ # @return json-ld representation of graph with default context
234
+ def to_jsonld(context=nil)
235
+ provenance
236
+ if context.nil?
237
+ @graph.dump(:jsonld, standard_prefixes: true)
238
+ else
239
+ @graph.dump(:jsonld, standard_prefixes: true, context: context)
240
+ end
241
+ end
242
+
243
+ # @return json-ld representation of graph with IIIF context
244
+ def to_jsonld_iiif
245
+ to_jsonld IIIF_CONTEXT
246
+ end
247
+
248
+ # @return json-ld representation of graph with OpenAnnotation context
249
+ def to_jsonld_oa
250
+ to_jsonld OA_CONTEXT
251
+ end
252
+
253
+ # A turtle string representation of the open annotation
254
+ def to_ttl
255
+ provenance
256
+ @graph.dump(:ttl, standard_prefixes: true)
257
+ end
258
+
259
+ end
260
+
261
+ end
262
+
@@ -0,0 +1,37 @@
1
+ require 'rdf'
2
+ require 'rdf-vocab'
3
+
4
+ # Module designed to be a mixin for manifest and annotation list.
5
+ module OpenAnnotationHarvest
6
+
7
+ # @param rdf [RDF::Graph] a graph to search for RDF::Vocab::OA.Annotation
8
+ # @return [Array<RDF::Graph>] for graphs of type RDF::Vocab::OA.Annotation
9
+ def collect_open_annotations(rdf)
10
+ oa_graphs = []
11
+ q = [nil, RDF.type, RDF::Vocab::OA.Annotation]
12
+ rdf.query(q).each_subject do |subject|
13
+ g = RDF::Graph.new
14
+ rdf.query([subject, nil, nil]) do |s,p,o|
15
+ g << [s,p,o]
16
+ g << rdf_expand_blank_nodes(o) if o.node?
17
+ end
18
+ oa_graphs << g
19
+ end
20
+ oa_graphs
21
+ end
22
+
23
+ # @param object [RDF::Node] An RDF blank node
24
+ # @return [RDF::Graph] graph of recursive resolution for a blank node
25
+ def rdf_expand_blank_nodes(object)
26
+ g = RDF::Graph.new
27
+ if object.node?
28
+ rdf.query([object, nil, nil]) do |s,p,o|
29
+ g << [s,p,o]
30
+ g << rdf_expand_blank_nodes(o) if o.node?
31
+ end
32
+ end
33
+ g
34
+ end
35
+
36
+ end
37
+
@@ -0,0 +1,264 @@
1
+
2
+ module Annotations2triannon
3
+
4
+ class Resource
5
+
6
+ @@config = nil
7
+
8
+ def self.http_head_request(url)
9
+ uri = nil
10
+ begin
11
+ response = RestClient.head(url)
12
+ uri = response.args[:url]
13
+ rescue
14
+ @@config.logger.error "RestClient.head failed for #{url}"
15
+ begin
16
+ response = RestClient.get(url)
17
+ uri = response.args[:url]
18
+ rescue
19
+ @@config.logger.error "RestClient.get failed for #{url}"
20
+ end
21
+ end
22
+ uri
23
+ end
24
+
25
+ attr_accessor :iri
26
+
27
+ def initialize(uri=nil)
28
+ @@agent ||= Annotations2triannon::AGENT
29
+ @@config ||= Annotations2triannon.configuration
30
+ if uri =~ /\A#{URI::regexp}\z/
31
+ uri = Addressable::URI.parse(uri.to_s) rescue nil
32
+ end
33
+ raise 'invalid uri' unless uri.instance_of? Addressable::URI
34
+ @iri = uri
35
+ end
36
+
37
+ def id
38
+ @iri.basename
39
+ end
40
+
41
+ def iri_type?(type)
42
+ iri_types.include? RDF::URI.parse(type)
43
+ end
44
+
45
+ def iri_types
46
+ q = [rdf_uri, RDF.type, :o]
47
+ rdf.query(q).collect {|s| s.object }
48
+ end
49
+
50
+ # Assert PROV.SoftwareAgent and PROV.generatedAtTime
51
+ def provenance
52
+ s = [rdf_uri, RDF::PROV.SoftwareAgent, @@agent]
53
+ rdf.insert(s)
54
+ s = [rdf_uri, RDF::PROV.generatedAtTime, rdf_now]
55
+ rdf.insert(s)
56
+ end
57
+
58
+ # This method is often overloaded in subclasses because
59
+ # RDF services use variations in the URL 'extension' patterns; e.g.
60
+ # see Loc#rdf and Viaf#rdf
61
+ def rdf
62
+ # TODO: try to retrieve the rdf from a local triple store
63
+ # TODO: if local triple store fails, try remote source(s)
64
+ # TODO: if retrieved from a remote source, save the rdf to a local triple store
65
+ return @rdf unless @rdf.nil?
66
+ uri4rdf = @iri.to_s
67
+ tries = 0
68
+ begin
69
+ tries += 1
70
+ @rdf = RDF::Graph.load(uri4rdf)
71
+ rescue
72
+ sleep 1*tries
73
+ retry if tries < 3
74
+ binding.pry if @@config.debug
75
+ @@config.logger.error("Failed to retrieve RDF for #{uri4rdf}")
76
+ @rdf = nil
77
+ end
78
+ end
79
+
80
+ # RDF query to find all objects of a predicate
81
+ # @param predicate [RDF::URI] An RDF predicate, the ?p in ?s ?p ?o
82
+ # @return [Array] The objects of predicate, the ?o in ?s ?p ?o
83
+ def query_predicate_objects(predicate)
84
+ q = [:s, predicate, :o]
85
+ rdf.query(q).collect {|s| s.object }
86
+ end
87
+
88
+ # RDF query to find all subjects with a predicate
89
+ # @param predicate [RDF::URI] An RDF predicate, the ?p in ?s ?p ?o
90
+ # @return [Array] The subjects with predicate, the ?s in ?s ?p ?o
91
+ def query_predicate_subjects(predicate)
92
+ q = [:s, predicate, :o]
93
+ rdf.query(q).collect {|s| s.subject }
94
+ end
95
+
96
+ # Regexp search to find an object matching a string, if it belongs to @iri
97
+ # @param id [String] A string literal used to construct a Regexp
98
+ # @return [RDF::URI] The first object matching the Regexp
99
+ def rdf_find_object(id)
100
+ return nil unless rdf_valid?
101
+ rdf.each_statement do |s|
102
+ if s.subject == @iri.to_s
103
+ return s.object if s.object.to_s =~ Regexp.new(id, Regexp::IGNORECASE)
104
+ end
105
+ end
106
+ nil
107
+ end
108
+
109
+ # Regexp search to find a subject matching a string
110
+ # @param id [String] A string literal used to construct a Regexp
111
+ # @return [RDF::URI] The first subject matching the Regexp
112
+ def rdf_find_subject(id)
113
+ return nil unless rdf_valid?
114
+ rdf.each_subject do |s|
115
+ return s if s.to_s =~ Regexp.new(id, Regexp::IGNORECASE)
116
+ end
117
+ nil
118
+ end
119
+
120
+ # @param object [RDF::Node] An RDF blank node
121
+ # @return [RDF::Graph] graph of recursive resolution for a blank node
122
+ def rdf_expand_blank_nodes(object)
123
+ g = RDF::Graph.new
124
+ if object.node?
125
+ rdf.query([object, nil, nil]) do |s,p,o|
126
+ g << [s,p,o]
127
+ g << rdf_expand_blank_nodes(o) if o.node?
128
+ end
129
+ end
130
+ g
131
+ end
132
+
133
+ # ----
134
+ # RDF::Graph convenience wrappers
135
+
136
+ def rdf_insert(uriS, uriP, uriO)
137
+ @rdf.insert RDF::Statement(uriS, uriP, uriO)
138
+ end
139
+ def rdf_insert_sameAs(uriS, uriO)
140
+ rdf_insert(uriS, RDF::OWL.sameAs, uriO)
141
+ end
142
+ def rdf_insert_seeAlso(uriS, uriO)
143
+ rdf_insert(uriS, RDF::RDFS.seeAlso, uriO)
144
+ end
145
+ def rdf_insert_creator(uriS, uriO)
146
+ rdf_insert(uriS, RDF::SCHEMA.creator, uriO)
147
+ end
148
+ def rdf_insert_contributor(uriS, uriO)
149
+ rdf_insert(uriS, RDF::SCHEMA.contributor, uriO)
150
+ end
151
+ def rdf_insert_editor(uriS, uriO)
152
+ rdf_insert(uriS, RDF::SCHEMA.editor, uriO)
153
+ end
154
+ def rdf_insert_exampleOfWork(uriS, uriO)
155
+ rdf_insert(uriS, RDF::SCHEMA.exampleOfWork, uriO)
156
+ end
157
+ def rdf_insert_foafFocus(uriS, uriO)
158
+ # http://xmlns.com/foaf/spec/#term_focus
159
+ # relates SKOS:Concept to a 'real world thing'
160
+ rdf_insert(uriS, RDF::FOAF.focus, uriO)
161
+ end
162
+ def rdf_insert_name(uriS, name)
163
+ rdf_insert(uriS, RDF::FOAF.name, name) if @@config.use_foaf
164
+ rdf_insert(uriS, RDF::SCHEMA.name, name) if @@config.use_schema
165
+ end
166
+
167
+ def rdf_now
168
+ RDF::Literal.new(Time.now.utc, :datatype => RDF::XSD.dateTime)
169
+ end
170
+
171
+ def rdf_uri
172
+ RDF::URI.new(@iri)
173
+ end
174
+
175
+ # Methods that assert RDF.type
176
+
177
+ def rdf_insert_type(uriS, uriO)
178
+ rdf_insert(uriS, RDF.type, uriO)
179
+ end
180
+
181
+ def rdf_type_agent(uriS)
182
+ # Note: schema.org has no immediate parent for Person or Organization
183
+ rdf_insert_type(uriS, RDF::FOAF.Agent) if @@config.use_foaf
184
+ rdf_insert_type(uriS, RDF::SCHEMA.Thing) if @@config.use_schema
185
+ end
186
+
187
+ def rdf_type_concept(uriS)
188
+ rdf_insert_type(uriS, RDF::SKOS.Concept)
189
+ end
190
+
191
+ def rdf_type_organization(uriS)
192
+ rdf_insert_type(uriS, RDF::FOAF.Organization) if @@config.use_foaf
193
+ rdf_insert_type(uriS, RDF::SCHEMA.Organization) if @@config.use_schema
194
+ end
195
+
196
+ def rdf_type_person(uriS)
197
+ rdf_insert_type(uriS, RDF::FOAF.Person) if @@config.use_foaf
198
+ rdf_insert_type(uriS, RDF::SCHEMA.Person) if @@config.use_schema
199
+ end
200
+
201
+ def rdf_valid?
202
+ iri_types.length > 0
203
+ end
204
+
205
+
206
+
207
+ # ---
208
+ # HTTP methods
209
+
210
+ # @param url [String|URI] A URL that can be resolved via HTTP request
211
+ # @return [String] The URL that resolves, after permanent redirections
212
+ def resolve_url(url)
213
+ begin
214
+ # RestClient does all the response code handling and redirection.
215
+ url = Resource.http_head_request(url)
216
+ if url.nil?
217
+ @@config.logger.warn "#{@iri}\t// #{url}"
218
+ else
219
+ @@config.logger.debug "Mapped #{@iri}\t-> #{url}"
220
+ end
221
+ rescue
222
+ binding.pry if @@config.debug
223
+ @@config.logger.error "unknown http error for #{@iri}"
224
+ url = nil
225
+ end
226
+ url
227
+ end
228
+
229
+ def same_as_org_graph
230
+ return @same_as_org_graph unless @same_as_org_graph.nil?
231
+ same_as_url = 'http://sameas.org/rdf?uri=' + URI.encode(@iri.to_s)
232
+ @same_as_org_graph = RDF::Graph.load(same_as_url)
233
+ end
234
+ def same_as_org_query
235
+ # q = SPARQL.parse("SELECT * WHERE { <#{@iri}> <http://www.w3.org/2002/07/owl#sameAs> ?o }")
236
+ q = [rdf_uri, RDF::OWL.sameAs, nil]
237
+ same_as_org_graph.query(q).collect {|s| s.object }
238
+ end
239
+
240
+
241
+
242
+ # ---
243
+ # Transforms or Serialization
244
+
245
+ # A json-ld object for the rdf resource
246
+ def as_jsonld
247
+ JSON::LD::API::fromRdf(rdf)
248
+ end
249
+
250
+ # A json-ld serialization of the rdf resource
251
+ def to_jsonld
252
+ rdf.dump(:jsonld, standard_prefixes: true)
253
+ end
254
+
255
+ # A turtle serialization of the rdf resource
256
+ def to_ttl
257
+ rdf.dump(:ttl, standard_prefixes: true)
258
+ end
259
+
260
+ end
261
+
262
+ end
263
+
264
+