annotations2triannon 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.env_example +44 -0
  3. data/.travis.yml +15 -0
  4. data/Gemfile +5 -0
  5. data/LICENSE +202 -0
  6. data/README.md +31 -0
  7. data/Rakefile +50 -0
  8. data/annotations2triannon.gemspec +58 -0
  9. data/bin/console +5 -0
  10. data/bin/ctags.rb +8 -0
  11. data/bin/dms.rb +175 -0
  12. data/bin/revs.rb +17 -0
  13. data/bin/revs_annotations2csv.sh +66 -0
  14. data/lib/annotations2triannon/annotation_list.rb +37 -0
  15. data/lib/annotations2triannon/configuration.rb +52 -0
  16. data/lib/annotations2triannon/iiif_annotation_list.rb +17 -0
  17. data/lib/annotations2triannon/iiif_collection.rb +56 -0
  18. data/lib/annotations2triannon/iiif_manifest.rb +32 -0
  19. data/lib/annotations2triannon/iiif_navigator.rb +172 -0
  20. data/lib/annotations2triannon/manifest.rb +86 -0
  21. data/lib/annotations2triannon/open_annotation.rb +262 -0
  22. data/lib/annotations2triannon/open_annotation_harvest.rb +37 -0
  23. data/lib/annotations2triannon/resource.rb +264 -0
  24. data/lib/annotations2triannon/revs.rb +263 -0
  25. data/lib/annotations2triannon/revs_db.rb +69 -0
  26. data/lib/annotations2triannon/shared_canvas_annotation_list.rb +18 -0
  27. data/lib/annotations2triannon/shared_canvas_manifest.rb +32 -0
  28. data/lib/annotations2triannon.rb +27 -0
  29. data/lib/rdf/vocab/Content.rb +112 -0
  30. data/lib/rdf/vocab/sc.rb +233 -0
  31. data/lib/requires.rb +69 -0
  32. data/log/.gitignore +4 -0
  33. data/spec/lib/annotations2triannon/configuration_spec.rb +24 -0
  34. data/spec/lib/annotations2triannon/open_annotation_spec.rb +176 -0
  35. data/spec/lib/annotations2triannon/resource_spec.rb +53 -0
  36. data/spec/lib/annotations2triannon_spec.rb +45 -0
  37. data/spec/spec_helper.rb +10 -0
  38. metadata +387 -0
@@ -0,0 +1,66 @@
1
+ #!/bin/bash
2
+ #===============================================================================
3
+ #
4
+ # FILE: revs_annotations2csv.sh
5
+ #
6
+ # USAGE: ./revs_annotations2csv.sh
7
+ #
8
+ # DESCRIPTION:
9
+ #
10
+ # OPTIONS: ---
11
+ # REQUIREMENTS: ---
12
+ # BUGS: ---
13
+ # NOTES: ---
14
+ # AUTHOR: Darren L. Weber, Ph.D. (), darren.weber@stanford.edu
15
+ # COMPANY: Stanford University
16
+ # VERSION: 1.0
17
+ # CREATED: 02/04/2015 02:24:13 PM PST
18
+ # REVISION: ---
19
+ #===============================================================================
20
+
21
+ source .env
22
+ # .env should export:
23
+ #export REVS_SRC_USER - user with login privilege on REVS_SRC_HOST
24
+ #export REVS_SRC_HOST - REVS_SRC_HOST should have access to REVS mysql db
25
+ # There should be a script in $REVS_SRC_USER@$REVS_SRC_HOST:~/revs_annotations_dump.sh
26
+ # That script should dump mysql tables into $REVS_SRC_USER@$REVS_SRC_HOST:~/revs_annotations.sql
27
+ # These ssh/scp connections may rely on Kerberos authentication.
28
+ ssh ${REVS_SRC_USER}@${REVS_SRC_HOST} '~/revs_annotations_dump.sh'
29
+ scp ${REVS_SRC_USER}@${REVS_SRC_HOST}:~/revs_annotations.sql .
30
+
31
+ sleep 1
32
+ if [ ! -s revs_annotations.sql ]; then
33
+ echo 'failed to dump and retrieve revs_annotations.sql'
34
+ exit 1
35
+ fi
36
+
37
+ # assumes the 'revs' db has been created and the current
38
+ # shell $USER has required privileges to run SQL in revs_annotations.sql,
39
+ # such as DROP/CREATE/LOCK table, INSERT etc. Also note that the
40
+ # $USER must have a MySQL password stored in ~/.mysql_pass
41
+ pass4mysql=$(cat ~/.mysql_pass)
42
+ mysql --user=$USER --password=${pass4mysql} revs < revs_annotations.sql
43
+ if [ $? -ne 0 ]; then echo 'failed to import revs_annotations.sql'; exit 1; fi
44
+ echo "Imported revs annotations into local MySQL db."
45
+
46
+ out_path='/tmp/revs'
47
+
48
+ mkdir -p $out_path
49
+ if [ $? -ne 0 ]; then echo "failed to create ${out_path}"; exit 1; fi
50
+
51
+ chmod a+rwx /tmp/revs
52
+ if [ $? -ne 0 ]; then echo "failed to chmod ${out_path}"; exit 1; fi
53
+
54
+ # Assumes the $USER has FILE permission, i.e.
55
+ # GRANT FILE ON *.* TO 'USER'@'localhost';
56
+ mysqldump --tab="${out_path}" \
57
+ --fields-enclosed-by="'" \
58
+ --fields-terminated-by=';' \
59
+ --fields-escaped-by='"' \
60
+ --lines-terminated-by='\n' \
61
+ --user=$USER --password=${pass4mysql} revs annotations users
62
+
63
+ if [ $? -ne 0 ]; then echo 'failed to export revs to CSV'; exit 1; fi
64
+ echo "Exported revs annotations to CSV files at ${out_path}: "
65
+ ls -lh /tmp/revs/
66
+
@@ -0,0 +1,37 @@
1
+
2
+ module Annotations2triannon
3
+
4
+ class AnnotationList < Resource
5
+
6
+ include OpenAnnotationHarvest
7
+
8
+ attr_reader :open_annotations
9
+
10
+ def annotation_list?
11
+ sc_annotation_list? || iiif_annotation_list?
12
+ end
13
+
14
+ def iiif_annotation_list?
15
+ iri_type? RDF::IIIFPresentation.AnnotationList
16
+ end
17
+
18
+ def sc_annotation_list?
19
+ iri_type? RDF::SC.AnnotationList
20
+ end
21
+
22
+ def open_annotations
23
+ return @open_annotations unless @open_annotations.nil?
24
+ begin
25
+ oa_graphs = collect_open_annotations(rdf)
26
+ oa_graphs = oa_graphs.sample(@@config.limit_openannos) if @@config.limit_openannos > 0
27
+ oa_graphs.collect {|oa| Annotations2triannon::OpenAnnotation.new(oa)}
28
+ rescue => e
29
+ binding.pry if @@config.debug
30
+ @@config.logger.error(e.message)
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ end
37
+
@@ -0,0 +1,52 @@
1
+
2
+ module Annotations2triannon
3
+
4
+ class Configuration
5
+
6
+ attr_reader :log_file
7
+ attr_reader :log_path
8
+ attr_reader :logger
9
+
10
+ attr_accessor :debug
11
+ attr_accessor :limit_manifests
12
+ attr_accessor :limit_annolists
13
+ attr_accessor :limit_openannos
14
+
15
+ def initialize
16
+ @debug = env_boolean('DEBUG')
17
+
18
+ # In development, enable options for random sampling the data
19
+ @limit_manifests = ENV['ANNO_LIMIT_MANIFESTS'].to_i # 0 disables sampling
20
+ @limit_annolists = ENV['ANNO_LIMIT_ANNOLISTS'].to_i # 0 disables sampling
21
+ @limit_openannos = ENV['ANNO_LIMIT_OPENANNOS'].to_i # 0 disables sampling
22
+
23
+ # logger
24
+ log_file = ENV['ANNO_LOG_FILE'] || 'annotations2triannon.log'
25
+ log_file = File.absolute_path log_file
26
+ @log_file = log_file
27
+ @log_path = File.dirname log_file
28
+ unless File.directory? @log_path
29
+ # try to create the log directory
30
+ Dir.mkdir @log_path rescue nil
31
+ end
32
+ begin
33
+ log_dev = File.new(@log_file, 'w+')
34
+ rescue
35
+ log_dev = $stderr
36
+ @log_file = 'STDERR'
37
+ end
38
+ log_dev.sync = true if @debug # skip IO buffering in debug mode
39
+ @logger = Logger.new(log_dev, 'monthly')
40
+ @logger.level = @debug ? Logger::DEBUG : Logger::INFO
41
+
42
+ end
43
+
44
+ def env_boolean(var)
45
+ # check if an ENV variable is true, use false as default
46
+ ENV[var].to_s.upcase == 'TRUE' rescue false
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+
@@ -0,0 +1,17 @@
1
+
2
+ module Annotations2triannon
3
+
4
+ # A filter to exclude any Shared Canvas namespace content
5
+ class IIIFAnnotationList < AnnotationList
6
+
7
+ def annotation_list?
8
+ iiif_annotation_list?
9
+ end
10
+
11
+ def sc_annotation_list?
12
+ false
13
+ end
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1,56 @@
1
+
2
+ module Annotations2triannon
3
+
4
+ class IIIFCollection < Resource
5
+
6
+ attr_reader :manifests
7
+ attr_reader :iiif_manifests
8
+ attr_reader :sc_manifests
9
+
10
+ def collection?
11
+ iri_type? RDF::IIIFPresentation.Collection
12
+ end
13
+
14
+ def manifests
15
+ return @manifests unless @manifests.nil?
16
+ manifests = []
17
+ manifests.push(* manifest_uris(query_iiif_manifests))
18
+ manifests.push(* manifest_uris(query_sc_manifests))
19
+ @manifests = manifests.collect {|m| Annotations2triannon::Manifest.new(m)}
20
+ end
21
+
22
+ def sc_manifests
23
+ return @sc_manifests unless @sc_manifests.nil?
24
+ @sc_manifests = manifest_uris(query_sc_manifests).collect do |s|
25
+ Annotations2triannon::SharedCanvasManifest.new(s.subject)
26
+ end
27
+ end
28
+
29
+ def iiif_manifests
30
+ return @iiif_manifests unless @iiif_manifests.nil?
31
+ @iiif_manifests = manifest_uris(query_iiif_manifests).collect do |s|
32
+ Annotations2triannon::IIIFManifest.new(s.subject)
33
+ end
34
+ end
35
+
36
+
37
+ private
38
+
39
+ def manifest_uris(q)
40
+ uris = rdf.query(q).collect {|s| s.subject }
41
+ uris = uris.sample(@@config.limit_manifests) if @@config.limit_manifests > 0
42
+ uris
43
+ end
44
+
45
+ def query_iiif_manifests
46
+ [nil, RDF.type, RDF::IIIFPresentation.Manifest]
47
+ end
48
+
49
+ def query_sc_manifests
50
+ [nil, RDF.type, RDF::SC.Manifest]
51
+ end
52
+
53
+ end
54
+
55
+ end
56
+
@@ -0,0 +1,32 @@
1
+
2
+ module Annotations2triannon
3
+
4
+ # A filter to exclude any Shared Canvas namespace content
5
+ class IIIFManifest < Manifest
6
+
7
+ def manifest?
8
+ iiif_manifest?
9
+ end
10
+
11
+ def sc_manifest?
12
+ false
13
+ end
14
+
15
+ def annotation_lists
16
+ return @annotation_lists unless @annotation_lists.nil?
17
+ uris = collect_annotation_list_uris(query_iiif_annotation_list)
18
+ @annotation_lists = uris.collect do |uri|
19
+ Annotations2triannon::AnnotationList.new(uri)
20
+ end
21
+ @annotation_lists
22
+ end
23
+
24
+ def sc_annotation_lists
25
+ return @sc_annotation_lists unless @sc_annotation_lists.nil?
26
+ @sc_annotation_lists = []
27
+ end
28
+
29
+ end
30
+
31
+ end
32
+
@@ -0,0 +1,172 @@
1
+
2
+ module Annotations2triannon
3
+
4
+ class IIIFNavigator
5
+
6
+ @@config = nil
7
+
8
+ attr_accessor :collection
9
+ attr_accessor :manifests
10
+ attr_accessor :annotation_lists
11
+ attr_accessor :open_annotations
12
+
13
+ attr_accessor :iiif_collection
14
+ attr_accessor :iiif_manifests
15
+ attr_accessor :iiif_annotation_lists
16
+ attr_accessor :iiif_open_annotations
17
+
18
+ attr_accessor :sc_manifests
19
+ attr_accessor :sc_annotation_lists
20
+ attr_accessor :sc_open_annotations
21
+
22
+
23
+ # @param collection_uri [URI|String] an HTTP URI for a collection
24
+ def initialize(collection_uri)
25
+ @@config ||= Annotations2triannon.configuration
26
+ @uri = RDF::URI.parse(collection_uri)
27
+ @collection = nil
28
+ @manifests = nil
29
+ @annotation_lists = nil
30
+ @open_annotations = nil
31
+ @iiif_collection = nil
32
+ @iiif_manifests = nil
33
+ @iiif_annotation_lists = nil
34
+ @iiif_open_annotations = nil
35
+ @sc_manifests = nil
36
+ @sc_annotation_lists = nil
37
+ @sc_open_annotations = nil
38
+ end
39
+
40
+
41
+ # ----
42
+ # Collection utilities
43
+
44
+ # @return collection - a IIIF Presentation collection
45
+ def collection
46
+ # There may be no distinction between IIIF and SC at the collection level
47
+ iiif_collection
48
+ end
49
+
50
+ # @return iiif_collection - a IIIF Presentation collection
51
+ def iiif_collection
52
+ @iiif_collection ||= Annotations2triannon::IIIFCollection.new(@uri)
53
+ end
54
+
55
+ # There is no RDF::SC.Collection because SC uses alternate
56
+ # vocabularies for this level of discovery.
57
+ # def sc_collection
58
+ # @sc_collection ||= Annotations2triannon::SCCollection.new(@uri)
59
+ # end
60
+
61
+
62
+ # ----
63
+ # Manifest utilities
64
+
65
+ # @return [boolean] are there any manifests in the collection?
66
+ def manifests?
67
+ ! manifests.empty?
68
+ end
69
+ # @return manifests [Array] generic manifests, either IIIF or SC manifests
70
+ def manifests
71
+ @manifests ||= collection.manifests
72
+ end
73
+
74
+ # IIIF manifests (excluding SC manifests)
75
+ # The RDF type of these manifests is declared in the parent collection.
76
+ # But, watch out, the manifest itself may declare a different RDF type!
77
+
78
+ # @return [boolean] are there any IIIF manifests in the collection?
79
+ def iiif_manifests?
80
+ ! iiif_manifests.empty?
81
+ end
82
+ # @return iiif_manifests [Array] IIIF presentation manifests
83
+ def iiif_manifests
84
+ @iiif_manifests ||= collection.iiif_manifests
85
+ end
86
+
87
+ # SC manifests (excluding IIIF manifests)
88
+ # The RDF type of these manifests is declared in the parent collection.
89
+ # But, watch out, the manifest itself may declare a different RDF type!
90
+
91
+ # @return [boolean] are there any Shared Canvas manifests in the collection?
92
+ def sc_manifests?
93
+ ! sc_manifests.empty?
94
+ end
95
+ # @return sc_manifests [Array] Shared Canvas manifests
96
+ def sc_manifests
97
+ @sc_manifests ||= collection.sc_manifests
98
+ end
99
+
100
+
101
+ # ----
102
+ # Annotation List utilities
103
+
104
+ # @return annotation_lists [Array] generic annotation lists
105
+ def annotation_lists
106
+ @annotation_lists ||= collect_annotation_lists(manifests)
107
+ end
108
+
109
+ # @return iiif_annotation_lists [Array] IIIF Presentation annotation lists
110
+ def iiif_annotation_lists
111
+ @iiif_annotation_lists ||= collect_annotation_lists(iiif_manifests)
112
+ end
113
+
114
+ # @return sc_annotation_lists [Array] Shared Canvas annotation lists
115
+ def sc_annotation_lists
116
+ @sc_annotation_lists ||= collect_annotation_lists(sc_manifests)
117
+ end
118
+
119
+
120
+ # ----
121
+ # Open Annotation utilities
122
+ # Note that these open annotations are from annotation lists, not
123
+ # directly from manifests.
124
+
125
+ # @return open_annotations [Array] Open Annotations from Annotation Lists
126
+ def open_annotations
127
+ @open_annotations ||= collect_open_annotations(annotation_lists)
128
+ end
129
+
130
+ # @return iiif_open_annotations [Array] Open Annotations from a IIIF Annotation Lists
131
+ def iiif_open_annotations
132
+ @iiif_open_annotations ||= collect_open_annotations(iiif_annotation_lists)
133
+ end
134
+
135
+ # @return sc_open_annotations [Array] Open Annotations from a Shared Canvas Annotation Lists
136
+ def sc_open_annotations
137
+ @sc_open_annotations ||= collect_open_annotations(sc_annotation_lists)
138
+ end
139
+
140
+
141
+ # def oa2triannon
142
+ #
143
+ # # TODO: - post the open_annotation to triannon-app
144
+ # # TODO: - check the http status on the post
145
+ # # TODO: - log.debug on success; log.error on errors
146
+ #
147
+ # end
148
+
149
+ private
150
+
151
+ def collect_annotation_lists(manifests)
152
+ anno_lists = {}
153
+ manifests.collect {|m| anno_lists[m.iri.to_s] = m.annotation_lists }
154
+ anno_lists
155
+ end
156
+
157
+ def collect_open_annotations(annotation_lists)
158
+ oa = {}
159
+ annotation_lists.each_pair do |manifest_uri, annotations_list|
160
+ oa[manifest_uri] = {} if oa[manifest_uri].nil?
161
+ annotations_list.each do |list|
162
+ list_uri = list.iri.to_s
163
+ oa[manifest_uri][list_uri] = list.open_annotations
164
+ end
165
+ end
166
+ oa
167
+ end
168
+
169
+ end
170
+
171
+ end
172
+
@@ -0,0 +1,86 @@
1
+
2
+ module Annotations2triannon
3
+
4
+ class Manifest < Resource
5
+
6
+ include OpenAnnotationHarvest
7
+
8
+ attr_reader :annotation_lists
9
+ attr_reader :open_annotations
10
+
11
+ def manifest?
12
+ iiif_manifest? || sc_manifest?
13
+ end
14
+
15
+ def iiif_manifest?
16
+ iri_type? RDF::IIIFPresentation.Manifest
17
+ end
18
+
19
+ def sc_manifest?
20
+ iri_type? RDF::SC.Manifest
21
+ end
22
+
23
+ def annotation_lists
24
+ return @annotation_lists unless @annotation_lists.nil?
25
+ uris = []
26
+ uris.push(* collect_annotation_list_uris(query_iiif_annotation_list))
27
+ uris.push(* collect_annotation_list_uris(query_sc_annotation_list))
28
+ @annotation_lists = uris.collect do |uri|
29
+ Annotations2triannon::AnnotationList.new(uri)
30
+ end
31
+ @annotation_lists
32
+ end
33
+
34
+ def iiif_annotation_lists
35
+ return @iiif_annotation_lists unless @iiif_annotation_lists.nil?
36
+ uris = collect_annotation_list_uris(query_iiif_annotation_list)
37
+ @iiif_annotation_lists = uris.collect do |uri|
38
+ Annotations2triannon::IIIFAnnotationList.new(uri)
39
+ end
40
+ @iiif_annotation_lists
41
+ end
42
+
43
+ def sc_annotation_lists
44
+ return @sc_annotation_lists unless @sc_annotation_lists.nil?
45
+ uris = collect_annotation_list_uris(query_sc_annotation_list)
46
+ @sc_annotation_lists = uris.collect do |uri|
47
+ Annotations2triannon::SharedCanvasAnnotationList.new(uri)
48
+ end
49
+ @sc_annotation_lists
50
+ end
51
+
52
+ def open_annotations
53
+ return @open_annotations unless @open_annotations.nil?
54
+ begin
55
+ oa_graphs = collect_open_annotations(rdf)
56
+ oa_graphs = oa_graphs.sample(@@config.limit_openannos) if @@config.limit_openannos > 0
57
+ oa_graphs
58
+ rescue => e
59
+ binding.pry if @@config.debug
60
+ @@config.logger.error(e.message)
61
+ end
62
+ end
63
+
64
+
65
+ protected
66
+
67
+ # @return a query triple to find RDF::SC.AnnotationList
68
+ def query_sc_annotation_list
69
+ [nil, RDF.type, RDF::SC.AnnotationList]
70
+ end
71
+
72
+ # @return a query triple to find RDF::IIIFPresentation.AnnotationList
73
+ def query_iiif_annotation_list
74
+ [nil, RDF.type, RDF::IIIFPresentation.AnnotationList]
75
+ end
76
+
77
+ def collect_annotation_list_uris(q)
78
+ uris = rdf.query(q).collect {|s| s.subject }
79
+ uris = uris.sample(@@config.limit_annolists) if @@config.limit_annolists > 0
80
+ uris
81
+ end
82
+
83
+ end
84
+
85
+ end
86
+