ld4l-works_rdf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +25 -0
  4. data/.travis.yml +14 -0
  5. data/CHANGES.md +3 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +170 -0
  9. data/Rakefile +2 -0
  10. data/ld4l-works_rdf.gemspec +42 -0
  11. data/lib/ld4l/works_rdf.rb +131 -0
  12. data/lib/ld4l/works_rdf/configuration.rb +41 -0
  13. data/lib/ld4l/works_rdf/models/bibframe/bibframe_identifier.rb +17 -0
  14. data/lib/ld4l/works_rdf/models/bibframe/bibframe_instance.rb +23 -0
  15. data/lib/ld4l/works_rdf/models/bibframe/bibframe_organization.rb +16 -0
  16. data/lib/ld4l/works_rdf/models/bibframe/bibframe_person.rb +16 -0
  17. data/lib/ld4l/works_rdf/models/bibframe/bibframe_place.rb +16 -0
  18. data/lib/ld4l/works_rdf/models/bibframe/bibframe_provider.rb +18 -0
  19. data/lib/ld4l/works_rdf/models/bibframe/bibframe_title.rb +18 -0
  20. data/lib/ld4l/works_rdf/models/bibframe/bibframe_work.rb +16 -0
  21. data/lib/ld4l/works_rdf/models/bibo/bibo_book.rb +19 -0
  22. data/lib/ld4l/works_rdf/models/bibo/bibo_document.rb +14 -0
  23. data/lib/ld4l/works_rdf/models/bibo/vivo_authorship.rb +15 -0
  24. data/lib/ld4l/works_rdf/models/bibo/vivo_book.rb +18 -0
  25. data/lib/ld4l/works_rdf/models/generic_work.rb +13 -0
  26. data/lib/ld4l/works_rdf/models/schema/oclc_schema_book.rb +16 -0
  27. data/lib/ld4l/works_rdf/models/schema/schema_book.rb +23 -0
  28. data/lib/ld4l/works_rdf/models/schema/schema_person.rb +17 -0
  29. data/lib/ld4l/works_rdf/models/schema/schema_publisher.rb +15 -0
  30. data/lib/ld4l/works_rdf/models/work_metadata.rb +212 -0
  31. data/lib/ld4l/works_rdf/services/attempt_generic_metadata_extraction.rb +41 -0
  32. data/lib/ld4l/works_rdf/services/conversion_services/get_rdfxml_from_marcxml.rb +44 -0
  33. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.ErrorCodes.xqy +56 -0
  34. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXML-2-MADSRDF.xqy +1702 -0
  35. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXML-2-RecordInfoRDF.xqy +216 -0
  36. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXMLBIB-2-BIBFRAME.xqy +140 -0
  37. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXMLBIB-BFUtils.xqy +3287 -0
  38. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-2-BIBFRAME-Shared.xqy +4112 -0
  39. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-Default-2-BF.xqy +61 -0
  40. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-NotatedMusic-2-BF.xqy +105 -0
  41. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-ExhibitJSON.xqy +119 -0
  42. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-JSON.xqy +193 -0
  43. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-Ntriples.xqy +276 -0
  44. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXMLnested-2-flat.xqy +380 -0
  45. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/ml.xqy +239 -0
  46. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/saxon.xqy +134 -0
  47. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba.xqy +359 -0
  48. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba2-0.xqy +249 -0
  49. data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba3-0.xqy +362 -0
  50. data/lib/ld4l/works_rdf/services/conversion_services/saxon/saxon9he.jar +0 -0
  51. data/lib/ld4l/works_rdf/services/get_metadata_from_marcxml_uri.rb +43 -0
  52. data/lib/ld4l/works_rdf/services/get_metadata_from_oclc_uri.rb +25 -0
  53. data/lib/ld4l/works_rdf/services/get_metadata_from_solr_query.rb +29 -0
  54. data/lib/ld4l/works_rdf/services/get_metadata_from_vivo_uri.rb +25 -0
  55. data/lib/ld4l/works_rdf/services/get_model_from_uri.rb +62 -0
  56. data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_bibframe_models.rb +60 -0
  57. data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_bibo_model.rb +42 -0
  58. data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_generic_model.rb +41 -0
  59. data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_oclc_model.rb +42 -0
  60. data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_solr_doc.rb +67 -0
  61. data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_vivo_model.rb +45 -0
  62. data/lib/ld4l/works_rdf/services/metadata_services/set_error_in_metadata.rb +27 -0
  63. data/lib/ld4l/works_rdf/services/model_services/populate_bibframe_models_from_repository.rb +46 -0
  64. data/lib/ld4l/works_rdf/services/model_services/populate_generic_model_from_repository.rb +30 -0
  65. data/lib/ld4l/works_rdf/services/model_services/populate_oclc_model_from_repository.rb +27 -0
  66. data/lib/ld4l/works_rdf/services/model_services/populate_vivo_model_from_repository.rb +27 -0
  67. data/lib/ld4l/works_rdf/services/negotiation_services/get_marcxml_from_uri.rb +35 -0
  68. data/lib/ld4l/works_rdf/services/negotiation_services/get_rdfxml_from_uri.rb +37 -0
  69. data/lib/ld4l/works_rdf/services/negotiation_services/get_solr_results_from_solr_query.rb +35 -0
  70. data/lib/ld4l/works_rdf/services/negotiation_services/get_turtle_from_uri.rb +37 -0
  71. data/lib/ld4l/works_rdf/services/negotiation_services/response_header.rb +51 -0
  72. data/lib/ld4l/works_rdf/services/repository_services/populate_graph_from_rdfxml.rb +22 -0
  73. data/lib/ld4l/works_rdf/services/repository_services/populate_graph_from_turtle.rb +21 -0
  74. data/lib/ld4l/works_rdf/services/repository_services/populate_repository_from_graph.rb +25 -0
  75. data/lib/ld4l/works_rdf/version.rb +5 -0
  76. data/lib/ld4l/works_rdf/vocab/bf.rb +29 -0
  77. data/lib/ld4l/works_rdf/vocab/bgn.rb +5 -0
  78. data/lib/ld4l/works_rdf/vocab/bibo.rb +10 -0
  79. data/lib/ld4l/works_rdf/vocab/library.rb +6 -0
  80. data/lib/ld4l/works_rdf/vocab/vitro.rb +5 -0
  81. data/lib/ld4l/works_rdf/vocab/vivo.rb +11 -0
  82. data/spec/ld4l/works_rdf/configuration_spec.rb +166 -0
  83. data/spec/ld4l/works_rdf/models/books/vivo_book_rdf_spec.rb +267 -0
  84. data/spec/ld4l/works_rdf/services/get_metadata_from_uri_spec.rb +39 -0
  85. data/spec/ld4l/works_rdf/services/get_model_from_uri_spec.rb +34 -0
  86. data/spec/ld4l/works_rdf_spec.rb +53 -0
  87. data/spec/spec_helper.rb +26 -0
  88. metadata +321 -0
@@ -0,0 +1,239 @@
1
+ xquery version "1.0-ml";
2
+
3
+ (:
4
+ : Module Name: MARC/XML BIB 2 BIBFRAME RDF using MarkLogic
5
+ :
6
+ : Module Version: 1.0
7
+ :
8
+ : Date: 2012 December 03
9
+ :
10
+ : Copyright: Public Domain
11
+ :
12
+ : Proprietary XQuery Extensions Used: xdmp (MarkLogic)
13
+ :
14
+ : Xquery Specification: January 2007
15
+ :
16
+ : Module Overview: Transforms MARC/XML Bibliographic records
17
+ : to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
18
+ : N-triples, or JSON.
19
+ :
20
+ :)
21
+
22
+ (:~
23
+ : Transforms MARC/XML Bibliographic records
24
+ : to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
25
+ : N-triples, or JSON.
26
+ :
27
+ : adding holdings capability; allow <marcxml:collection> with multiple records,some holdigns, related to bibs on 004
28
+
29
+ : @author Kevin Ford (kefo@loc.gov)
30
+
31
+ : @since December 17, 2014
32
+ : @version 1.0
33
+ :)
34
+
35
+ (: IMPORTED MODULES :)
36
+ import module namespace marcbib2bibframe = "info:lc/id-modules/marcbib2bibframe#" at "../modules/module.MARCXMLBIB-2-BIBFRAME.xqy";
37
+ import module namespace rdfxml2nt = "info:lc/id-modules/rdfxml2nt#" at "../modules/module.RDFXML-2-Ntriples.xqy";
38
+ import module namespace rdfxml2json = "info:lc/id-modules/rdfxml2json#" at "../modules/module.RDFXML-2-JSON.xqy";
39
+ import module namespace bfRDFXML2exhibitJSON = "info:lc/bf-modules/bfRDFXML2exhibitJSON#" at "../modules/module.RDFXML-2-ExhibitJSON.xqy";
40
+ import module namespace RDFXMLnested2flat = "info:lc/bf-modules/RDFXMLnested2flat#" at "../modules/module.RDFXMLnested-2-flat.xqy";
41
+
42
+ (: NAMESPACES :)
43
+ declare namespace xdmp = "http://marklogic.com/xdmp";
44
+
45
+ declare namespace marcxml = "http://www.loc.gov/MARC21/slim";
46
+ declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
47
+ declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";
48
+
49
+ declare namespace bf = "http://bibframe.org/vocab/";
50
+ declare namespace madsrdf = "http://www.loc.gov/mads/rdf/v1#";
51
+ declare namespace relators = "http://id.loc.gov/vocabulary/relators/";
52
+ declare namespace identifiers = "http://id.loc.gov/vocabulary/identifiers/";
53
+ declare namespace notes = "http://id.loc.gov/vocabulary/notes/";
54
+
55
+ declare namespace log = "info:lc/marc2bibframe/logging#";
56
+ declare namespace mlerror = "http://marklogic.com/xdmp/error";
57
+
58
+ declare option xdmp:output "indent-untyped=yes" ;
59
+
60
+ (:~
61
+ : This variable is for the base uri for your Authorites/Concepts.
62
+ : It is the base URI for the rdf:about attribute.
63
+ :
64
+ :)
65
+ declare variable $baseuri as xs:string := xdmp:get-request-field("baseuri","http://example.org/");
66
+
67
+ (:~
68
+ : This variable determines whether bnodes should identify resources instead of
69
+ : http URIs, except the "main" Work derived from each MARC record. At this time,
70
+ : the "main" work must be referenced by HTTP URI (using the $baseuri variable
71
+ : above).
72
+ :
73
+ :)
74
+ declare variable $usebnodes as xs:string := xdmp:get-request-field("usebnodes","false");
75
+
76
+ (:~
77
+ : This variable is for the MARCXML location - externally defined.
78
+ :)
79
+ declare variable $marcxmluri as xs:string := xdmp:get-request-field("marcxmluri","");
80
+
81
+ (:~
82
+ : This variable is for desired serialzation. Expected values are: rdfxml (default), rdfxml-raw, ntriples, json, exhibitJSON, log
83
+ :)
84
+ declare variable $serialization as xs:string := xdmp:get-request-field("serialization","rdfxml");
85
+
86
+ (:~
87
+ : If set to "true" will write log file to directory.
88
+ :)
89
+ declare variable $writelog as xs:string := xdmp:get-request-field("writelog","false");
90
+
91
+ (:~
92
+ : Directory for log files. MUST end with a slash.
93
+ :)
94
+ declare variable $logdir as xs:string := xdmp:get-request-field("logdir","");
95
+
96
+
97
+
98
+ let $startDT := fn:current-dateTime()
99
+ let $logfilename := fn:replace(fn:substring-before(xs:string($startDT), "."), "-|:", "")
100
+ let $logfilename := fn:concat($logdir, $logfilename, '.log.xml')
101
+
102
+ let $marcxml :=
103
+ xdmp:document-get(
104
+ $marcxmluri,
105
+ <options xmlns="xdmp:document-get">
106
+ <format>xml</format>
107
+ </options>
108
+ )
109
+
110
+ let $marcxml := $marcxml//marcxml:record
111
+
112
+ let $result :=
113
+ for $r in $marcxml[@type="Bibliographic" or fn:not(@type)]
114
+ let $controlnum := xs:string($r/marcxml:controlfield[@tag eq "001"][1])
115
+ let $holds:=
116
+ for $hold in $marcxml[fn:string(marcxml:controlfield[@tag="004"])=$controlnum]
117
+ return $hold
118
+ let $httpuri := fn:concat($baseuri , $controlnum)
119
+ let $recordset:= element marcxml:collection{$r,$holds}
120
+ let $r :=
121
+ try {
122
+ (:let $rdf := marcbib2bibframe:marcbib2bibframe($r,$httpuri):)
123
+ let $rdf := marcbib2bibframe:marcbib2bibframe($recordset,$httpuri)
124
+ let $o := $rdf/child::node()[fn:name()]
125
+ let $logmsg :=
126
+ element log:success {
127
+ attribute uri {$httpuri},
128
+ attribute datetime { fn:current-dateTime() }
129
+ }
130
+ return
131
+ element result {
132
+ element logmsg {$logmsg},
133
+ element rdf {$o}
134
+ }
135
+ } catch ($e) {
136
+ (: ML provides the full stack, but for brevity only take the spawning error. :)
137
+ let $stack1 := $e/mlerror:stack/mlerror:frame[1]
138
+ let $vars :=
139
+ for $v in $stack1/mlerror:variables/mlerror:variable
140
+ return
141
+ element log:error-variable {
142
+ element log:error-name { xs:string($v/mlerror:name) },
143
+ element log:error-value { xs:string($v/mlerror:value) }
144
+ }
145
+ let $logmsg :=
146
+ element log:error {
147
+ attribute uri {$httpuri},
148
+ attribute datetime { fn:current-dateTime() },
149
+ element log:error-details {
150
+ (: ML appears to be the actual err:* code in mlerror:name :)
151
+ element log:error-enginecode { xs:string($e/mlerror:code) },
152
+ element log:error-xcode { xs:string($e/mlerror:name) },
153
+ element log:error-msg { xs:string($e/mlerror:message) },
154
+ element log:error-description { xs:string($e/mlerror:format-string) },
155
+ element log:error-expression { xs:string($e/mlerror:expr) },
156
+ element log:error-file { xs:string($stack1/mlerror:uri) },
157
+ element log:error-line { xs:string($stack1/mlerror:line) },
158
+ element log:error-column { xs:string($stack1/mlerror:column) },
159
+ element log:error-operation { xs:string($stack1/mlerror:operation) }
160
+ },
161
+ element log:offending-record {
162
+ $r
163
+ }
164
+ }
165
+ return
166
+ element result {
167
+ element logmsg {$logmsg}
168
+ }
169
+ }
170
+ return
171
+ $r
172
+
173
+ let $rdfxml-raw :=
174
+ element rdf:RDF {
175
+ $result//rdf/child::node()[fn:name()]
176
+ }
177
+
178
+ let $rdfxml :=
179
+ if ( $serialization ne "rdfxml-raw" ) then
180
+ RDFXMLnested2flat:RDFXMLnested2flat($rdfxml-raw, $baseuri, $usebnodes)
181
+ else
182
+ $rdfxml-raw
183
+
184
+ let $endDT := fn:current-dateTime()
185
+ let $log :=
186
+ element log:log {
187
+ attribute engine {"MarkLogic"},
188
+ attribute start {$startDT},
189
+ attribute end {$endDT},
190
+ attribute source {$marcxmluri},
191
+ attribute total-submitted { fn:count($marcxml) },
192
+ attribute total-success { fn:count($marcxml) - fn:count($result//logmsg/log:error) },
193
+ attribute total-error { fn:count($result//logmsg/log:error) },
194
+ $result//logmsg/log:*
195
+ }
196
+
197
+ (: This might be a problem if run in a modules database. :)
198
+ let $logwritten :=
199
+ if ($writelog eq "true") then
200
+ xdmp:save($logfilename, $log,
201
+ <options xmlns="xdmp:save">
202
+ <indent>yes</indent>
203
+ <method>xml</method>
204
+ <output-encoding>utf-8</output-encoding>
205
+ </options>
206
+ )
207
+ else
208
+ ()
209
+
210
+ (:
211
+ For now, not injecting notice about an error into the JSON outputs.
212
+ There are a couple of ways to do it (one is a hack, the other is the right way)
213
+ but 1) will it break anything and 2) is there a need?
214
+ :)
215
+ let $response :=
216
+ if ($serialization eq "ntriples") then
217
+ if (fn:count($result//logmsg/log:error) > 0) then
218
+ fn:concat("# Errors encountered. View 'log' for details.", fn:codepoints-to-string(10), rdfxml2nt:rdfxml2ntriples($rdfxml))
219
+ else
220
+ rdfxml2nt:rdfxml2ntriples($rdfxml)
221
+ else if ($serialization eq "json") then
222
+ rdfxml2json:rdfxml2json($rdfxml)
223
+ else if ($serialization eq "exhibitJSON") then
224
+ bfRDFXML2exhibitJSON:bfRDFXML2exhibitJSON($rdfxml, $baseuri)
225
+ else if ($serialization eq "log") then
226
+ $log
227
+ else
228
+ if (fn:count($result//logmsg/log:error) > 0) then
229
+ element rdf:RDF {
230
+ comment {"Errors encountered. View 'log' for details."},
231
+ $rdfxml/*
232
+ }
233
+ else
234
+ $rdfxml
235
+
236
+ return $response
237
+
238
+
239
+
@@ -0,0 +1,134 @@
1
+ xquery version "1.0";
2
+
3
+ (:
4
+ : Module Name: MARC/XML BIB 2 BIBFRAME RDF using Saxon
5
+ :
6
+ : Module Version: 1.0
7
+ :
8
+ : Date: 2012 December 03
9
+ :
10
+ : Copyright: Public Domain
11
+ :
12
+ : Proprietary XQuery Extensions Used: saxon (Saxon)
13
+ :
14
+ : Xquery Specification: January 2007
15
+ :
16
+ : Module Overview: Transforms MARC/XML Bibliographic records
17
+ : to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
18
+ : N-triples, or JSON.
19
+ :
20
+ :)
21
+
22
+ (:~
23
+ : Transforms MARC/XML Bibliographic records
24
+ : to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
25
+ : N-triples, or JSON.
26
+ :
27
+ : adding holdings capability; allow <marcxml:collection> with multiple records,some holdings, related to bibs on 004
28
+
29
+ : @author Kevin Ford (kefo@loc.gov)
30
+ : @since December 17, 2014
31
+ : @version 1.0
32
+ :)
33
+
34
+ (: IMPORTED MODULES :)
35
+ import module namespace marcbib2bibframe = "info:lc/id-modules/marcbib2bibframe#" at "../modules/module.MARCXMLBIB-2-BIBFRAME.xqy";
36
+ import module namespace rdfxml2nt = "info:lc/id-modules/rdfxml2nt#" at "../modules/module.RDFXML-2-Ntriples.xqy";
37
+ import module namespace rdfxml2json = "info:lc/id-modules/rdfxml2json#" at "../modules/module.RDFXML-2-JSON.xqy";
38
+ import module namespace bfRDFXML2exhibitJSON = "info:lc/bf-modules/bfRDFXML2exhibitJSON#" at "../modules/module.RDFXML-2-ExhibitJSON.xqy";
39
+ import module namespace RDFXMLnested2flat = "info:lc/bf-modules/RDFXMLnested2flat#" at "../modules/module.RDFXMLnested-2-flat.xqy";
40
+
41
+ (: NAMESPACES :)
42
+
43
+ declare namespace marcxml = "http://www.loc.gov/MARC21/slim";
44
+ declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
45
+ declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";
46
+
47
+ declare namespace bf = "http://bibframe.org/vocab/";
48
+ declare namespace madsrdf = "http://www.loc.gov/mads/rdf/v1#";
49
+ declare namespace relators = "http://id.loc.gov/vocabulary/relators/";
50
+ declare namespace identifiers = "http://id.loc.gov/vocabulary/identifiers/";
51
+ declare namespace notes = "http://id.loc.gov/vocabulary/notes/";
52
+
53
+ declare option saxon:output "indent=yes";
54
+
55
+ (:~
56
+ : This variable is for the base uri for your Authorites/Concepts.
57
+ : It is the base URI for the rdf:about attribute.
58
+ :
59
+ :)
60
+ declare option saxon:default """http://example.org/""";
61
+ declare variable $baseuri as xs:string external;
62
+
63
+ (:~
64
+ : This variable determines whether bnodes should identify resources instead of
65
+ : http URIs, except for the "main" Work derived from each MARC record. At this time,
66
+ : the "main" Work must be identified by HTTP URI (using the $baseuri variable
67
+ : above).
68
+ :
69
+ :)
70
+ declare option saxon:default """false""";
71
+ declare variable $usebnodes as xs:string external;
72
+
73
+ (:~
74
+ : This variable is for the MARCXML location - externally defined.
75
+ :)
76
+ declare option saxon:default """NONE""";
77
+ declare variable $marcxmluri as xs:string external;
78
+
79
+ (:~
80
+ : This variable is for desired serialzation. Expected values are: rdfxml (default), ntriples, json
81
+ :)
82
+ declare option saxon:default """rdfxml""";
83
+ declare variable $serialization as xs:string external;
84
+
85
+ let $marcxml :=
86
+ if ($marcxmluri ne "NONE") then
87
+ fn:doc($marcxmluri)//marcxml:record
88
+ else
89
+ //marcxml:record
90
+
91
+ let $usebnodes:= if ($usebnodes="") then "false" else $usebnodes
92
+
93
+ let $resources :=
94
+ (:for $r in $marcxml:)
95
+ for $r in $marcxml[@type="Bibliographic" or fn:not(@type)]
96
+
97
+ let $controlnum := xs:string($r/marcxml:controlfield[@tag eq "001"][1])
98
+ let $holds:=
99
+ for $hold in $marcxml[fn:string(marcxml:controlfield[@tag="004"])=$controlnum]
100
+ return $hold
101
+
102
+ let $httpuri := fn:concat($baseuri , $controlnum)
103
+ let $recordset:= element marcxml:collection{$r,$holds}
104
+ let $bibframe := marcbib2bibframe:marcbib2bibframe($recordset,$httpuri)
105
+ return $bibframe/child::node()[fn:name()]
106
+
107
+ let $rdfxml-raw :=
108
+ element rdf:RDF {
109
+ $resources
110
+ }
111
+
112
+ let $rdfxml :=
113
+ if ( $serialization ne "rdfxml-raw" ) then
114
+ RDFXMLnested2flat:RDFXMLnested2flat($rdfxml-raw, $baseuri, $usebnodes)
115
+ else
116
+ $rdfxml-raw
117
+
118
+ let $response :=
119
+ if ($serialization eq "ntriples") then
120
+ rdfxml2nt:rdfxml2ntriples($rdfxml)
121
+ else if ($serialization eq "json") then
122
+ rdfxml2json:rdfxml2json($rdfxml)
123
+ else if ($serialization eq "exhibitJSON") then
124
+ bfRDFXML2exhibitJSON:bfRDFXML2exhibitJSON($rdfxml, $baseuri)
125
+ else
126
+ $rdfxml
127
+
128
+ return $response
129
+
130
+
131
+
132
+
133
+
134
+
@@ -0,0 +1,359 @@
1
+ xquery version "3.0";
2
+
3
+ (:
4
+ : Module Name: MARC/XML BIB 2 BIBFRAME RDF using Saxon
5
+ :
6
+ : Module Version: 1.0
7
+ :
8
+ : Date: 2012 December 03
9
+ :
10
+ : Copyright: Public Domain
11
+ :
12
+ : Proprietary XQuery Extensions Used: Zorba (expath)
13
+ :
14
+ : Xquery Specification: January 2007
15
+ :
16
+ : Module Overview: Transforms MARC/XML Bibliographic records
17
+ : to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
18
+ : N-triples, or JSON.
19
+ :
20
+ : adding holdings capability; allow <marcxml:collection> with multiple records,some holdigns, related to bibs on 004
21
+ :
22
+ : Run: zorba -i -q file:///location/of/zorba.xqy -e marcxmluri:="http://location/of/marcxml.xml" -e serialization:="rdfxml" -e baseuri:="http://your-base-uri/" -e usebnode:="true"
23
+ : Run: zorba -i -q file:///location/of/zorba.xqy -e marcxmluri:="../location/of/marcxml.xml" -e serialization:="rdfxml" -e baseuri:="http://your-base-uri/" -e usebnode:="true"
24
+ :)
25
+
26
+ (:~
27
+ : Transforms MARC/XML Bibliographic records
28
+ : to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
29
+ : N-triples, or JSON.
30
+ :
31
+ : @author Nate Trail (ntra@loc.gov)
32
+ : @author Kevin Ford (kefo@loc.gov)
33
+ : @since December 17, 2014
34
+ : @version 1.0
35
+ :)
36
+
37
+ (: IMPORTED MODULES :)
38
+ import module namespace http = "http://zorba.io/modules/http-client";
39
+ import module namespace file = "http://expath.org/ns/file";
40
+ import module namespace parsexml = "http://zorba.io/modules/xml";
41
+ import schema namespace parseoptions = "http://zorba.io/modules/xml-options";
42
+
43
+ import module namespace marcbib2bibframe = "info:lc/id-modules/marcbib2bibframe#" at "../modules/module.MARCXMLBIB-2-BIBFRAME.xqy";
44
+ import module namespace rdfxml2nt = "info:lc/id-modules/rdfxml2nt#" at "../modules/module.RDFXML-2-Ntriples.xqy";
45
+ import module namespace rdfxml2json = "info:lc/id-modules/rdfxml2json#" at "../modules/module.RDFXML-2-JSON.xqy";
46
+ import module namespace bfRDFXML2exhibitJSON = "info:lc/bf-modules/bfRDFXML2exhibitJSON#" at "../modules/module.RDFXML-2-ExhibitJSON.xqy";
47
+ import module namespace RDFXMLnested2flat = "info:lc/bf-modules/RDFXMLnested2flat#" at "../modules/module.RDFXMLnested-2-flat.xqy";
48
+
49
+ (: NAMESPACES :)
50
+ declare namespace marcxml = "http://www.loc.gov/MARC21/slim";
51
+ declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
52
+ declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";
53
+
54
+ declare namespace bf = "http://bibframe.org/vocab/";
55
+ declare namespace madsrdf = "http://www.loc.gov/mads/rdf/v1#";
56
+ declare namespace relators = "http://id.loc.gov/vocabulary/relators/";
57
+ declare namespace identifiers = "http://id.loc.gov/vocabulary/identifiers/";
58
+ declare namespace notes = "http://id.loc.gov/vocabulary/notes/";
59
+
60
+ declare namespace an = "http://zorba.io/annotations";
61
+ declare namespace httpexpath = "http://expath.org/ns/http-client";
62
+ declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
63
+
64
+ declare namespace log = "info:lc/marc2bibframe/logging#";
65
+ declare namespace err = "http://www.w3.org/2005/xqt-errors";
66
+ declare namespace zerror = "http://zorba.io/errors";
67
+
68
+ (:~
69
+ : This variable is for the base uri for your Authorites/Concepts.
70
+ : It is the base URI for the rdf:about attribute.
71
+ :
72
+ :)
73
+ declare variable $baseuri as xs:string external := "http://example.org/";
74
+
75
+ (:~
76
+ : This variable determines whether bnodes should identify resources instead of
77
+ : http URIs, except for the "main" Work derived from each MARC record. At this time,
78
+ : the "main" Work must be identified by HTTP URI (using the $baseuri variable
79
+ : above).
80
+ :
81
+ :)
82
+ declare variable $usebnodes as xs:string external := "false";
83
+
84
+ (:~
85
+ : This variable is for the MARCXML location - externally defined.
86
+ :)
87
+ declare variable $marcxmluri as xs:string external;
88
+
89
+ (:~
90
+ : This variable is for desired serialzation. Expected values are: rdfxml (default), rdfxml-raw, ntriples, json, exhibitJSON, log
91
+ :)
92
+ declare variable $serialization as xs:string external := "rdfxml";
93
+
94
+ (:~
95
+ : This variable is for desired serialzation. Expected values are: rdfxml (default), rdfxml-raw, ntriples, json, exhibitJSON
96
+ :)
97
+ declare variable $resolveLabelsWithID as xs:string external := "false";
98
+
99
+ (:~
100
+ : If set to "true" will write log file to directory.
101
+ :)
102
+ declare variable $writelog as xs:string external := "false";
103
+
104
+ (:~
105
+ : Directory for log files. MUST end with a slash.
106
+ :)
107
+ declare variable $logdir as xs:string external := "";
108
+
109
+ (:~
110
+ Performs an http get but does not follow redirects
111
+
112
+ $l as xs:string is the label
113
+ $scheme as xs:string is the scheme
114
+ :)
115
+ declare %an:sequential function local:http-get(
116
+ $label as xs:string,
117
+ $scheme as xs:string
118
+ )
119
+ {
120
+ let $l := fn:encode-for-uri($label)
121
+ (:
122
+ let $request :=
123
+ httpexpath:send-request(
124
+ <httpexpath:request
125
+ method="GET"
126
+ href="http://id.loc.gov/authorities/{$scheme}/label/{$l}"
127
+ follow-redirect="false"/>
128
+ )
129
+ :)
130
+ let $options := fn:concat('{
131
+ "method": "GET",
132
+ "href": "http://id.loc.gov/authorities/', $scheme, '/label/', $l , '",
133
+ "options":
134
+ {
135
+ "status-only": true,
136
+ "override-media-type": "text/plain",
137
+ "follow-redirect": false,
138
+ "timeout": 5,
139
+ "user-agent": "MARC2BIBFRAME"
140
+ }
141
+ }')
142
+ let $request := http:send-request(jn:parse-json($options))
143
+ return $request
144
+ };
145
+
146
+ (:~
147
+ Outputs a resource, replacing verbose hasAuthority property
148
+ with a simple rdf:resource pointer
149
+
150
+ $resource as element() is the resource
151
+ $authuri as xs:string is the authority URI
152
+ :)
153
+ declare %an:nondeterministic function local:generate-resource(
154
+ $r as element(),
155
+ $authuri as xs:string
156
+ )
157
+ {
158
+ element { fn:name($r) } {
159
+ $r/@*,
160
+ $r/*[fn:name() ne "bf:hasAuthority"],
161
+ element bf:hasAuthority {
162
+ attribute rdf:resource { $authuri }
163
+ }
164
+ }
165
+ };
166
+
167
+
168
+ (:~
169
+ Tries to resolve Labels to URIs
170
+
171
+ $resource as element() is the resource
172
+ $authuri as xs:string is the authority URI
173
+ :)
174
+ declare %an:sequential function local:resolve-labels(
175
+ $flatrdfxml as element(rdf:RDF)
176
+ )
177
+ {
178
+ let $resources :=
179
+ for $r in $flatrdfxml/*
180
+ let $n := fn:local-name($r)
181
+ let $scheme :=
182
+ if ( fn:matches($n, "Topic|TemporalConcept") ) then
183
+ "subjects"
184
+ else
185
+ "names"
186
+ return
187
+ if ( fn:matches($n, "Person|Organization|Place|Meeting|Family|Topic|TemporalConcept") ) then
188
+ let $label := ($r/bf:authorizedAccessPoint, $r/bf:label)[1]
189
+ let $label := fn:normalize-space(xs:string($label))
190
+ let $req1 := local:http-get($label, $scheme)
191
+ let $resource :=
192
+ if ($req1("status") eq 302) then
193
+ let $authuri := xs:string($req1("headers")("X-URI"))
194
+ return local:generate-resource($r, $authuri)
195
+ else if (
196
+ $req1("status") ne 302 and
197
+ fn:ends-with($label, ".")
198
+ ) then
199
+ let $l := fn:substring($label, 1, fn:string-length($label)-1)
200
+ let $req2 := local:http-get($l, $scheme)
201
+ return
202
+ if ($req2("status") eq 302) then
203
+ let $authuri := xs:string($req2("headers")("X-URI"))
204
+ return local:generate-resource($r, $authuri)
205
+ else
206
+ (: There was no match or some other message, keep moving :)
207
+ $r
208
+ else
209
+ $r
210
+ return $resource
211
+
212
+ else
213
+ $r
214
+
215
+ return <rdf:RDF>{$resources}</rdf:RDF>
216
+ };
217
+
218
+
219
+
220
+ let $startDT := fn:current-dateTime()
221
+ let $logfilename := fn:replace(fn:substring-before(xs:string($startDT), "."), "-|:", "")
222
+ let $logfilename := fn:concat($logdir, $logfilename, '.log.xml')
223
+
224
+ let $marcxml :=
225
+ if ( fn:starts-with($marcxmluri, "http://" ) or fn:starts-with($marcxmluri, "https://" ) ) then
226
+ let $json := http:get($marcxmluri)
227
+ return parsexml:parse($json("body")("content"), <parseoptions:options/>)
228
+ else
229
+ let $raw-data :=
230
+ if ( fn:starts-with($marcxmluri, "raw:" ) ) then
231
+ fn:substring($marcxmluri, 5)
232
+ else
233
+ file:read-text($marcxmluri)
234
+ let $mxml := parsexml:parse(
235
+ $raw-data,
236
+ <parseoptions:options />
237
+ )
238
+ return $mxml
239
+ let $marcxml := $marcxml//marcxml:record
240
+
241
+ let $result :=
242
+ (:for $r in $marcxml:)
243
+ for $r in $marcxml[@type="Bibliographic" or fn:not(@type)]
244
+ let $controlnum := xs:string($r/marcxml:controlfield[@tag eq "001"][1])
245
+ let $holds:=
246
+ for $hold in $marcxml[fn:string(marcxml:controlfield[@tag="004"])=$controlnum]
247
+ return $hold
248
+ let $httpuri := fn:concat($baseuri , $controlnum)
249
+ let $recordset:= element marcxml:collection{$r,$holds}
250
+ let $r :=
251
+ try {
252
+ let $rdf := marcbib2bibframe:marcbib2bibframe($recordset,$httpuri)
253
+ let $o := $rdf/child::node()[fn:name()]
254
+ let $logmsg :=
255
+ element log:success {
256
+ attribute uri {$httpuri},
257
+ attribute datetime { fn:current-dateTime() }
258
+ }
259
+ return
260
+ element result {
261
+ element logmsg {$logmsg},
262
+ element rdf {$o}
263
+ }
264
+ } catch * {
265
+ (: Could get entire stack trace from Zorba, but omitting for now. :)
266
+ let $stack1 := $zerror:stack-trace
267
+ let $logmsg :=
268
+ element log:error {
269
+ attribute uri {$httpuri},
270
+ attribute datetime { fn:current-dateTime() },
271
+ element log:error-details {
272
+ element log:error-xcode { xs:string($err:code) },
273
+ element log:error-description { xs:string($err:description) },
274
+ element log:error-file { xs:string($err:module) },
275
+ element log:error-line { xs:string($err:line-number) },
276
+ element log:error-column { xs:string($err:column-number) }
277
+ (: element log:error-stack { $stack1 } :)
278
+ },
279
+ element log:offending-record {
280
+ $r
281
+ }
282
+ }
283
+ return
284
+ element result {
285
+ element logmsg {$logmsg}
286
+ }
287
+ }
288
+ return
289
+ $r
290
+
291
+ let $rdfxml-raw :=
292
+ element rdf:RDF {
293
+ $result//rdf/child::node()[fn:name()]
294
+ }
295
+
296
+ let $rdfxml :=
297
+ if ( $serialization ne "rdfxml-raw" ) then
298
+ let $flatrdfxml := RDFXMLnested2flat:RDFXMLnested2flat($rdfxml-raw, $baseuri, $usebnodes)
299
+ return
300
+ if ($resolveLabelsWithID eq "true") then
301
+ local:resolve-labels($flatrdfxml)
302
+ else
303
+ $flatrdfxml
304
+ else
305
+ $rdfxml-raw
306
+
307
+ let $endDT := fn:current-dateTime()
308
+ let $log :=
309
+ element log:log {
310
+ attribute engine {"MarkLogic"},
311
+ attribute start {$startDT},
312
+ attribute end {$endDT},
313
+ attribute source {$marcxmluri},
314
+ attribute total-submitted { fn:count($marcxml) },
315
+ attribute total-success { fn:count($marcxml) - fn:count($result//logmsg/log:error) },
316
+ attribute total-error { fn:count($result//logmsg/log:error) },
317
+ $result//logmsg/log:*
318
+ }
319
+
320
+ let $logwritten :=
321
+ if ($writelog eq "true") then
322
+ file:write-text($logfilename, serialize($log,
323
+ <output:serialization-parameters>
324
+ <output:indent value="yes"/>
325
+ <output:method value="xml"/>
326
+ <output:omit-xml-declaration value="no"/>
327
+ </output:serialization-parameters>)
328
+ )
329
+ else
330
+ ()
331
+
332
+ (:
333
+ For now, not injecting notice about an error into the JSON outputs.
334
+ There are a couple of ways to do it (one is a hack, the other is the right way)
335
+ but 1) will it break anything and 2) is there a need?
336
+ :)
337
+ let $response :=
338
+ if ($serialization eq "ntriples") then
339
+ if (fn:count($result//logmsg/log:error) > 0) then
340
+ fn:concat("# Errors encountered. View 'log' for details.", fn:codepoints-to-string(10), rdfxml2nt:rdfxml2ntriples($rdfxml))
341
+ else
342
+ rdfxml2nt:rdfxml2ntriples($rdfxml)
343
+ else if ($serialization eq "json") then
344
+ rdfxml2json:rdfxml2json($rdfxml)
345
+ else if ($serialization eq "exhibitJSON") then
346
+ bfRDFXML2exhibitJSON:bfRDFXML2exhibitJSON($rdfxml, $baseuri)
347
+ else if ($serialization eq "log") then
348
+ $log
349
+ else
350
+ if (fn:count($result//logmsg/log:error) > 0) then
351
+ element rdf:RDF {
352
+ comment {"Errors encountered. View 'log' for details."},
353
+ $rdfxml/*
354
+ }
355
+ else
356
+ $rdfxml
357
+
358
+ return $response
359
+