ld4l-works_rdf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +25 -0
- data/.travis.yml +14 -0
- data/CHANGES.md +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +170 -0
- data/Rakefile +2 -0
- data/ld4l-works_rdf.gemspec +42 -0
- data/lib/ld4l/works_rdf.rb +131 -0
- data/lib/ld4l/works_rdf/configuration.rb +41 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_identifier.rb +17 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_instance.rb +23 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_organization.rb +16 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_person.rb +16 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_place.rb +16 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_provider.rb +18 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_title.rb +18 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_work.rb +16 -0
- data/lib/ld4l/works_rdf/models/bibo/bibo_book.rb +19 -0
- data/lib/ld4l/works_rdf/models/bibo/bibo_document.rb +14 -0
- data/lib/ld4l/works_rdf/models/bibo/vivo_authorship.rb +15 -0
- data/lib/ld4l/works_rdf/models/bibo/vivo_book.rb +18 -0
- data/lib/ld4l/works_rdf/models/generic_work.rb +13 -0
- data/lib/ld4l/works_rdf/models/schema/oclc_schema_book.rb +16 -0
- data/lib/ld4l/works_rdf/models/schema/schema_book.rb +23 -0
- data/lib/ld4l/works_rdf/models/schema/schema_person.rb +17 -0
- data/lib/ld4l/works_rdf/models/schema/schema_publisher.rb +15 -0
- data/lib/ld4l/works_rdf/models/work_metadata.rb +212 -0
- data/lib/ld4l/works_rdf/services/attempt_generic_metadata_extraction.rb +41 -0
- data/lib/ld4l/works_rdf/services/conversion_services/get_rdfxml_from_marcxml.rb +44 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.ErrorCodes.xqy +56 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXML-2-MADSRDF.xqy +1702 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXML-2-RecordInfoRDF.xqy +216 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXMLBIB-2-BIBFRAME.xqy +140 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXMLBIB-BFUtils.xqy +3287 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-2-BIBFRAME-Shared.xqy +4112 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-Default-2-BF.xqy +61 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-NotatedMusic-2-BF.xqy +105 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-ExhibitJSON.xqy +119 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-JSON.xqy +193 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-Ntriples.xqy +276 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXMLnested-2-flat.xqy +380 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/ml.xqy +239 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/saxon.xqy +134 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba.xqy +359 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba2-0.xqy +249 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba3-0.xqy +362 -0
- data/lib/ld4l/works_rdf/services/conversion_services/saxon/saxon9he.jar +0 -0
- data/lib/ld4l/works_rdf/services/get_metadata_from_marcxml_uri.rb +43 -0
- data/lib/ld4l/works_rdf/services/get_metadata_from_oclc_uri.rb +25 -0
- data/lib/ld4l/works_rdf/services/get_metadata_from_solr_query.rb +29 -0
- data/lib/ld4l/works_rdf/services/get_metadata_from_vivo_uri.rb +25 -0
- data/lib/ld4l/works_rdf/services/get_model_from_uri.rb +62 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_bibframe_models.rb +60 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_bibo_model.rb +42 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_generic_model.rb +41 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_oclc_model.rb +42 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_solr_doc.rb +67 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_vivo_model.rb +45 -0
- data/lib/ld4l/works_rdf/services/metadata_services/set_error_in_metadata.rb +27 -0
- data/lib/ld4l/works_rdf/services/model_services/populate_bibframe_models_from_repository.rb +46 -0
- data/lib/ld4l/works_rdf/services/model_services/populate_generic_model_from_repository.rb +30 -0
- data/lib/ld4l/works_rdf/services/model_services/populate_oclc_model_from_repository.rb +27 -0
- data/lib/ld4l/works_rdf/services/model_services/populate_vivo_model_from_repository.rb +27 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/get_marcxml_from_uri.rb +35 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/get_rdfxml_from_uri.rb +37 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/get_solr_results_from_solr_query.rb +35 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/get_turtle_from_uri.rb +37 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/response_header.rb +51 -0
- data/lib/ld4l/works_rdf/services/repository_services/populate_graph_from_rdfxml.rb +22 -0
- data/lib/ld4l/works_rdf/services/repository_services/populate_graph_from_turtle.rb +21 -0
- data/lib/ld4l/works_rdf/services/repository_services/populate_repository_from_graph.rb +25 -0
- data/lib/ld4l/works_rdf/version.rb +5 -0
- data/lib/ld4l/works_rdf/vocab/bf.rb +29 -0
- data/lib/ld4l/works_rdf/vocab/bgn.rb +5 -0
- data/lib/ld4l/works_rdf/vocab/bibo.rb +10 -0
- data/lib/ld4l/works_rdf/vocab/library.rb +6 -0
- data/lib/ld4l/works_rdf/vocab/vitro.rb +5 -0
- data/lib/ld4l/works_rdf/vocab/vivo.rb +11 -0
- data/spec/ld4l/works_rdf/configuration_spec.rb +166 -0
- data/spec/ld4l/works_rdf/models/books/vivo_book_rdf_spec.rb +267 -0
- data/spec/ld4l/works_rdf/services/get_metadata_from_uri_spec.rb +39 -0
- data/spec/ld4l/works_rdf/services/get_model_from_uri_spec.rb +34 -0
- data/spec/ld4l/works_rdf_spec.rb +53 -0
- data/spec/spec_helper.rb +26 -0
- metadata +321 -0
@@ -0,0 +1,239 @@
|
|
1
|
+
xquery version "1.0-ml";
|
2
|
+
|
3
|
+
(:
|
4
|
+
: Module Name: MARC/XML BIB 2 BIBFRAME RDF using MarkLogic
|
5
|
+
:
|
6
|
+
: Module Version: 1.0
|
7
|
+
:
|
8
|
+
: Date: 2012 December 03
|
9
|
+
:
|
10
|
+
: Copyright: Public Domain
|
11
|
+
:
|
12
|
+
: Proprietary XQuery Extensions Used: xdmp (MarkLogic)
|
13
|
+
:
|
14
|
+
: Xquery Specification: January 2007
|
15
|
+
:
|
16
|
+
: Module Overview: Transforms MARC/XML Bibliographic records
|
17
|
+
: to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
|
18
|
+
: N-triples, or JSON.
|
19
|
+
:
|
20
|
+
:)
|
21
|
+
|
22
|
+
(:~
|
23
|
+
: Transforms MARC/XML Bibliographic records
|
24
|
+
: to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
|
25
|
+
: N-triples, or JSON.
|
26
|
+
:
|
27
|
+
: adding holdings capability; allow <marcxml:collection> with multiple records,some holdigns, related to bibs on 004
|
28
|
+
|
29
|
+
: @author Kevin Ford (kefo@loc.gov)
|
30
|
+
|
31
|
+
: @since December 17, 2014
|
32
|
+
: @version 1.0
|
33
|
+
:)
|
34
|
+
|
35
|
+
(: IMPORTED MODULES :)
|
36
|
+
import module namespace marcbib2bibframe = "info:lc/id-modules/marcbib2bibframe#" at "../modules/module.MARCXMLBIB-2-BIBFRAME.xqy";
|
37
|
+
import module namespace rdfxml2nt = "info:lc/id-modules/rdfxml2nt#" at "../modules/module.RDFXML-2-Ntriples.xqy";
|
38
|
+
import module namespace rdfxml2json = "info:lc/id-modules/rdfxml2json#" at "../modules/module.RDFXML-2-JSON.xqy";
|
39
|
+
import module namespace bfRDFXML2exhibitJSON = "info:lc/bf-modules/bfRDFXML2exhibitJSON#" at "../modules/module.RDFXML-2-ExhibitJSON.xqy";
|
40
|
+
import module namespace RDFXMLnested2flat = "info:lc/bf-modules/RDFXMLnested2flat#" at "../modules/module.RDFXMLnested-2-flat.xqy";
|
41
|
+
|
42
|
+
(: NAMESPACES :)
|
43
|
+
declare namespace xdmp = "http://marklogic.com/xdmp";
|
44
|
+
|
45
|
+
declare namespace marcxml = "http://www.loc.gov/MARC21/slim";
|
46
|
+
declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
47
|
+
declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";
|
48
|
+
|
49
|
+
declare namespace bf = "http://bibframe.org/vocab/";
|
50
|
+
declare namespace madsrdf = "http://www.loc.gov/mads/rdf/v1#";
|
51
|
+
declare namespace relators = "http://id.loc.gov/vocabulary/relators/";
|
52
|
+
declare namespace identifiers = "http://id.loc.gov/vocabulary/identifiers/";
|
53
|
+
declare namespace notes = "http://id.loc.gov/vocabulary/notes/";
|
54
|
+
|
55
|
+
declare namespace log = "info:lc/marc2bibframe/logging#";
|
56
|
+
declare namespace mlerror = "http://marklogic.com/xdmp/error";
|
57
|
+
|
58
|
+
declare option xdmp:output "indent-untyped=yes" ;
|
59
|
+
|
60
|
+
(:~
|
61
|
+
: This variable is for the base uri for your Authorites/Concepts.
|
62
|
+
: It is the base URI for the rdf:about attribute.
|
63
|
+
:
|
64
|
+
:)
|
65
|
+
declare variable $baseuri as xs:string := xdmp:get-request-field("baseuri","http://example.org/");
|
66
|
+
|
67
|
+
(:~
|
68
|
+
: This variable determines whether bnodes should identify resources instead of
|
69
|
+
: http URIs, except the "main" Work derived from each MARC record. At this time,
|
70
|
+
: the "main" work must be referenced by HTTP URI (using the $baseuri variable
|
71
|
+
: above).
|
72
|
+
:
|
73
|
+
:)
|
74
|
+
declare variable $usebnodes as xs:string := xdmp:get-request-field("usebnodes","false");
|
75
|
+
|
76
|
+
(:~
|
77
|
+
: This variable is for the MARCXML location - externally defined.
|
78
|
+
:)
|
79
|
+
declare variable $marcxmluri as xs:string := xdmp:get-request-field("marcxmluri","");
|
80
|
+
|
81
|
+
(:~
|
82
|
+
: This variable is for desired serialzation. Expected values are: rdfxml (default), rdfxml-raw, ntriples, json, exhibitJSON, log
|
83
|
+
:)
|
84
|
+
declare variable $serialization as xs:string := xdmp:get-request-field("serialization","rdfxml");
|
85
|
+
|
86
|
+
(:~
|
87
|
+
: If set to "true" will write log file to directory.
|
88
|
+
:)
|
89
|
+
declare variable $writelog as xs:string := xdmp:get-request-field("writelog","false");
|
90
|
+
|
91
|
+
(:~
|
92
|
+
: Directory for log files. MUST end with a slash.
|
93
|
+
:)
|
94
|
+
declare variable $logdir as xs:string := xdmp:get-request-field("logdir","");
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
let $startDT := fn:current-dateTime()
|
99
|
+
let $logfilename := fn:replace(fn:substring-before(xs:string($startDT), "."), "-|:", "")
|
100
|
+
let $logfilename := fn:concat($logdir, $logfilename, '.log.xml')
|
101
|
+
|
102
|
+
let $marcxml :=
|
103
|
+
xdmp:document-get(
|
104
|
+
$marcxmluri,
|
105
|
+
<options xmlns="xdmp:document-get">
|
106
|
+
<format>xml</format>
|
107
|
+
</options>
|
108
|
+
)
|
109
|
+
|
110
|
+
let $marcxml := $marcxml//marcxml:record
|
111
|
+
|
112
|
+
let $result :=
|
113
|
+
for $r in $marcxml[@type="Bibliographic" or fn:not(@type)]
|
114
|
+
let $controlnum := xs:string($r/marcxml:controlfield[@tag eq "001"][1])
|
115
|
+
let $holds:=
|
116
|
+
for $hold in $marcxml[fn:string(marcxml:controlfield[@tag="004"])=$controlnum]
|
117
|
+
return $hold
|
118
|
+
let $httpuri := fn:concat($baseuri , $controlnum)
|
119
|
+
let $recordset:= element marcxml:collection{$r,$holds}
|
120
|
+
let $r :=
|
121
|
+
try {
|
122
|
+
(:let $rdf := marcbib2bibframe:marcbib2bibframe($r,$httpuri):)
|
123
|
+
let $rdf := marcbib2bibframe:marcbib2bibframe($recordset,$httpuri)
|
124
|
+
let $o := $rdf/child::node()[fn:name()]
|
125
|
+
let $logmsg :=
|
126
|
+
element log:success {
|
127
|
+
attribute uri {$httpuri},
|
128
|
+
attribute datetime { fn:current-dateTime() }
|
129
|
+
}
|
130
|
+
return
|
131
|
+
element result {
|
132
|
+
element logmsg {$logmsg},
|
133
|
+
element rdf {$o}
|
134
|
+
}
|
135
|
+
} catch ($e) {
|
136
|
+
(: ML provides the full stack, but for brevity only take the spawning error. :)
|
137
|
+
let $stack1 := $e/mlerror:stack/mlerror:frame[1]
|
138
|
+
let $vars :=
|
139
|
+
for $v in $stack1/mlerror:variables/mlerror:variable
|
140
|
+
return
|
141
|
+
element log:error-variable {
|
142
|
+
element log:error-name { xs:string($v/mlerror:name) },
|
143
|
+
element log:error-value { xs:string($v/mlerror:value) }
|
144
|
+
}
|
145
|
+
let $logmsg :=
|
146
|
+
element log:error {
|
147
|
+
attribute uri {$httpuri},
|
148
|
+
attribute datetime { fn:current-dateTime() },
|
149
|
+
element log:error-details {
|
150
|
+
(: ML appears to be the actual err:* code in mlerror:name :)
|
151
|
+
element log:error-enginecode { xs:string($e/mlerror:code) },
|
152
|
+
element log:error-xcode { xs:string($e/mlerror:name) },
|
153
|
+
element log:error-msg { xs:string($e/mlerror:message) },
|
154
|
+
element log:error-description { xs:string($e/mlerror:format-string) },
|
155
|
+
element log:error-expression { xs:string($e/mlerror:expr) },
|
156
|
+
element log:error-file { xs:string($stack1/mlerror:uri) },
|
157
|
+
element log:error-line { xs:string($stack1/mlerror:line) },
|
158
|
+
element log:error-column { xs:string($stack1/mlerror:column) },
|
159
|
+
element log:error-operation { xs:string($stack1/mlerror:operation) }
|
160
|
+
},
|
161
|
+
element log:offending-record {
|
162
|
+
$r
|
163
|
+
}
|
164
|
+
}
|
165
|
+
return
|
166
|
+
element result {
|
167
|
+
element logmsg {$logmsg}
|
168
|
+
}
|
169
|
+
}
|
170
|
+
return
|
171
|
+
$r
|
172
|
+
|
173
|
+
let $rdfxml-raw :=
|
174
|
+
element rdf:RDF {
|
175
|
+
$result//rdf/child::node()[fn:name()]
|
176
|
+
}
|
177
|
+
|
178
|
+
let $rdfxml :=
|
179
|
+
if ( $serialization ne "rdfxml-raw" ) then
|
180
|
+
RDFXMLnested2flat:RDFXMLnested2flat($rdfxml-raw, $baseuri, $usebnodes)
|
181
|
+
else
|
182
|
+
$rdfxml-raw
|
183
|
+
|
184
|
+
let $endDT := fn:current-dateTime()
|
185
|
+
let $log :=
|
186
|
+
element log:log {
|
187
|
+
attribute engine {"MarkLogic"},
|
188
|
+
attribute start {$startDT},
|
189
|
+
attribute end {$endDT},
|
190
|
+
attribute source {$marcxmluri},
|
191
|
+
attribute total-submitted { fn:count($marcxml) },
|
192
|
+
attribute total-success { fn:count($marcxml) - fn:count($result//logmsg/log:error) },
|
193
|
+
attribute total-error { fn:count($result//logmsg/log:error) },
|
194
|
+
$result//logmsg/log:*
|
195
|
+
}
|
196
|
+
|
197
|
+
(: This might be a problem if run in a modules database. :)
|
198
|
+
let $logwritten :=
|
199
|
+
if ($writelog eq "true") then
|
200
|
+
xdmp:save($logfilename, $log,
|
201
|
+
<options xmlns="xdmp:save">
|
202
|
+
<indent>yes</indent>
|
203
|
+
<method>xml</method>
|
204
|
+
<output-encoding>utf-8</output-encoding>
|
205
|
+
</options>
|
206
|
+
)
|
207
|
+
else
|
208
|
+
()
|
209
|
+
|
210
|
+
(:
|
211
|
+
For now, not injecting notice about an error into the JSON outputs.
|
212
|
+
There are a couple of ways to do it (one is a hack, the other is the right way)
|
213
|
+
but 1) will it break anything and 2) is there a need?
|
214
|
+
:)
|
215
|
+
let $response :=
|
216
|
+
if ($serialization eq "ntriples") then
|
217
|
+
if (fn:count($result//logmsg/log:error) > 0) then
|
218
|
+
fn:concat("# Errors encountered. View 'log' for details.", fn:codepoints-to-string(10), rdfxml2nt:rdfxml2ntriples($rdfxml))
|
219
|
+
else
|
220
|
+
rdfxml2nt:rdfxml2ntriples($rdfxml)
|
221
|
+
else if ($serialization eq "json") then
|
222
|
+
rdfxml2json:rdfxml2json($rdfxml)
|
223
|
+
else if ($serialization eq "exhibitJSON") then
|
224
|
+
bfRDFXML2exhibitJSON:bfRDFXML2exhibitJSON($rdfxml, $baseuri)
|
225
|
+
else if ($serialization eq "log") then
|
226
|
+
$log
|
227
|
+
else
|
228
|
+
if (fn:count($result//logmsg/log:error) > 0) then
|
229
|
+
element rdf:RDF {
|
230
|
+
comment {"Errors encountered. View 'log' for details."},
|
231
|
+
$rdfxml/*
|
232
|
+
}
|
233
|
+
else
|
234
|
+
$rdfxml
|
235
|
+
|
236
|
+
return $response
|
237
|
+
|
238
|
+
|
239
|
+
|
@@ -0,0 +1,134 @@
|
|
1
|
+
xquery version "1.0";
|
2
|
+
|
3
|
+
(:
|
4
|
+
: Module Name: MARC/XML BIB 2 BIBFRAME RDF using Saxon
|
5
|
+
:
|
6
|
+
: Module Version: 1.0
|
7
|
+
:
|
8
|
+
: Date: 2012 December 03
|
9
|
+
:
|
10
|
+
: Copyright: Public Domain
|
11
|
+
:
|
12
|
+
: Proprietary XQuery Extensions Used: saxon (Saxon)
|
13
|
+
:
|
14
|
+
: Xquery Specification: January 2007
|
15
|
+
:
|
16
|
+
: Module Overview: Transforms MARC/XML Bibliographic records
|
17
|
+
: to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
|
18
|
+
: N-triples, or JSON.
|
19
|
+
:
|
20
|
+
:)
|
21
|
+
|
22
|
+
(:~
|
23
|
+
: Transforms MARC/XML Bibliographic records
|
24
|
+
: to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
|
25
|
+
: N-triples, or JSON.
|
26
|
+
:
|
27
|
+
: adding holdings capability; allow <marcxml:collection> with multiple records,some holdings, related to bibs on 004
|
28
|
+
|
29
|
+
: @author Kevin Ford (kefo@loc.gov)
|
30
|
+
: @since December 17, 2014
|
31
|
+
: @version 1.0
|
32
|
+
:)
|
33
|
+
|
34
|
+
(: IMPORTED MODULES :)
|
35
|
+
import module namespace marcbib2bibframe = "info:lc/id-modules/marcbib2bibframe#" at "../modules/module.MARCXMLBIB-2-BIBFRAME.xqy";
|
36
|
+
import module namespace rdfxml2nt = "info:lc/id-modules/rdfxml2nt#" at "../modules/module.RDFXML-2-Ntriples.xqy";
|
37
|
+
import module namespace rdfxml2json = "info:lc/id-modules/rdfxml2json#" at "../modules/module.RDFXML-2-JSON.xqy";
|
38
|
+
import module namespace bfRDFXML2exhibitJSON = "info:lc/bf-modules/bfRDFXML2exhibitJSON#" at "../modules/module.RDFXML-2-ExhibitJSON.xqy";
|
39
|
+
import module namespace RDFXMLnested2flat = "info:lc/bf-modules/RDFXMLnested2flat#" at "../modules/module.RDFXMLnested-2-flat.xqy";
|
40
|
+
|
41
|
+
(: NAMESPACES :)
|
42
|
+
|
43
|
+
declare namespace marcxml = "http://www.loc.gov/MARC21/slim";
|
44
|
+
declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
45
|
+
declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";
|
46
|
+
|
47
|
+
declare namespace bf = "http://bibframe.org/vocab/";
|
48
|
+
declare namespace madsrdf = "http://www.loc.gov/mads/rdf/v1#";
|
49
|
+
declare namespace relators = "http://id.loc.gov/vocabulary/relators/";
|
50
|
+
declare namespace identifiers = "http://id.loc.gov/vocabulary/identifiers/";
|
51
|
+
declare namespace notes = "http://id.loc.gov/vocabulary/notes/";
|
52
|
+
|
53
|
+
declare option saxon:output "indent=yes";
|
54
|
+
|
55
|
+
(:~
|
56
|
+
: This variable is for the base uri for your Authorites/Concepts.
|
57
|
+
: It is the base URI for the rdf:about attribute.
|
58
|
+
:
|
59
|
+
:)
|
60
|
+
declare option saxon:default """http://example.org/""";
|
61
|
+
declare variable $baseuri as xs:string external;
|
62
|
+
|
63
|
+
(:~
|
64
|
+
: This variable determines whether bnodes should identify resources instead of
|
65
|
+
: http URIs, except for the "main" Work derived from each MARC record. At this time,
|
66
|
+
: the "main" Work must be identified by HTTP URI (using the $baseuri variable
|
67
|
+
: above).
|
68
|
+
:
|
69
|
+
:)
|
70
|
+
declare option saxon:default """false""";
|
71
|
+
declare variable $usebnodes as xs:string external;
|
72
|
+
|
73
|
+
(:~
|
74
|
+
: This variable is for the MARCXML location - externally defined.
|
75
|
+
:)
|
76
|
+
declare option saxon:default """NONE""";
|
77
|
+
declare variable $marcxmluri as xs:string external;
|
78
|
+
|
79
|
+
(:~
|
80
|
+
: This variable is for desired serialzation. Expected values are: rdfxml (default), ntriples, json
|
81
|
+
:)
|
82
|
+
declare option saxon:default """rdfxml""";
|
83
|
+
declare variable $serialization as xs:string external;
|
84
|
+
|
85
|
+
let $marcxml :=
|
86
|
+
if ($marcxmluri ne "NONE") then
|
87
|
+
fn:doc($marcxmluri)//marcxml:record
|
88
|
+
else
|
89
|
+
//marcxml:record
|
90
|
+
|
91
|
+
let $usebnodes:= if ($usebnodes="") then "false" else $usebnodes
|
92
|
+
|
93
|
+
let $resources :=
|
94
|
+
(:for $r in $marcxml:)
|
95
|
+
for $r in $marcxml[@type="Bibliographic" or fn:not(@type)]
|
96
|
+
|
97
|
+
let $controlnum := xs:string($r/marcxml:controlfield[@tag eq "001"][1])
|
98
|
+
let $holds:=
|
99
|
+
for $hold in $marcxml[fn:string(marcxml:controlfield[@tag="004"])=$controlnum]
|
100
|
+
return $hold
|
101
|
+
|
102
|
+
let $httpuri := fn:concat($baseuri , $controlnum)
|
103
|
+
let $recordset:= element marcxml:collection{$r,$holds}
|
104
|
+
let $bibframe := marcbib2bibframe:marcbib2bibframe($recordset,$httpuri)
|
105
|
+
return $bibframe/child::node()[fn:name()]
|
106
|
+
|
107
|
+
let $rdfxml-raw :=
|
108
|
+
element rdf:RDF {
|
109
|
+
$resources
|
110
|
+
}
|
111
|
+
|
112
|
+
let $rdfxml :=
|
113
|
+
if ( $serialization ne "rdfxml-raw" ) then
|
114
|
+
RDFXMLnested2flat:RDFXMLnested2flat($rdfxml-raw, $baseuri, $usebnodes)
|
115
|
+
else
|
116
|
+
$rdfxml-raw
|
117
|
+
|
118
|
+
let $response :=
|
119
|
+
if ($serialization eq "ntriples") then
|
120
|
+
rdfxml2nt:rdfxml2ntriples($rdfxml)
|
121
|
+
else if ($serialization eq "json") then
|
122
|
+
rdfxml2json:rdfxml2json($rdfxml)
|
123
|
+
else if ($serialization eq "exhibitJSON") then
|
124
|
+
bfRDFXML2exhibitJSON:bfRDFXML2exhibitJSON($rdfxml, $baseuri)
|
125
|
+
else
|
126
|
+
$rdfxml
|
127
|
+
|
128
|
+
return $response
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
|
@@ -0,0 +1,359 @@
|
|
1
|
+
xquery version "3.0";
|
2
|
+
|
3
|
+
(:
|
4
|
+
: Module Name: MARC/XML BIB 2 BIBFRAME RDF using Saxon
|
5
|
+
:
|
6
|
+
: Module Version: 1.0
|
7
|
+
:
|
8
|
+
: Date: 2012 December 03
|
9
|
+
:
|
10
|
+
: Copyright: Public Domain
|
11
|
+
:
|
12
|
+
: Proprietary XQuery Extensions Used: Zorba (expath)
|
13
|
+
:
|
14
|
+
: Xquery Specification: January 2007
|
15
|
+
:
|
16
|
+
: Module Overview: Transforms MARC/XML Bibliographic records
|
17
|
+
: to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
|
18
|
+
: N-triples, or JSON.
|
19
|
+
:
|
20
|
+
: adding holdings capability; allow <marcxml:collection> with multiple records,some holdigns, related to bibs on 004
|
21
|
+
:
|
22
|
+
: Run: zorba -i -q file:///location/of/zorba.xqy -e marcxmluri:="http://location/of/marcxml.xml" -e serialization:="rdfxml" -e baseuri:="http://your-base-uri/" -e usebnode:="true"
|
23
|
+
: Run: zorba -i -q file:///location/of/zorba.xqy -e marcxmluri:="../location/of/marcxml.xml" -e serialization:="rdfxml" -e baseuri:="http://your-base-uri/" -e usebnode:="true"
|
24
|
+
:)
|
25
|
+
|
26
|
+
(:~
|
27
|
+
: Transforms MARC/XML Bibliographic records
|
28
|
+
: to RDF conforming to the BIBFRAME model. Outputs RDF/XML,
|
29
|
+
: N-triples, or JSON.
|
30
|
+
:
|
31
|
+
: @author Nate Trail (ntra@loc.gov)
|
32
|
+
: @author Kevin Ford (kefo@loc.gov)
|
33
|
+
: @since December 17, 2014
|
34
|
+
: @version 1.0
|
35
|
+
:)
|
36
|
+
|
37
|
+
(: IMPORTED MODULES :)
|
38
|
+
import module namespace http = "http://zorba.io/modules/http-client";
|
39
|
+
import module namespace file = "http://expath.org/ns/file";
|
40
|
+
import module namespace parsexml = "http://zorba.io/modules/xml";
|
41
|
+
import schema namespace parseoptions = "http://zorba.io/modules/xml-options";
|
42
|
+
|
43
|
+
import module namespace marcbib2bibframe = "info:lc/id-modules/marcbib2bibframe#" at "../modules/module.MARCXMLBIB-2-BIBFRAME.xqy";
|
44
|
+
import module namespace rdfxml2nt = "info:lc/id-modules/rdfxml2nt#" at "../modules/module.RDFXML-2-Ntriples.xqy";
|
45
|
+
import module namespace rdfxml2json = "info:lc/id-modules/rdfxml2json#" at "../modules/module.RDFXML-2-JSON.xqy";
|
46
|
+
import module namespace bfRDFXML2exhibitJSON = "info:lc/bf-modules/bfRDFXML2exhibitJSON#" at "../modules/module.RDFXML-2-ExhibitJSON.xqy";
|
47
|
+
import module namespace RDFXMLnested2flat = "info:lc/bf-modules/RDFXMLnested2flat#" at "../modules/module.RDFXMLnested-2-flat.xqy";
|
48
|
+
|
49
|
+
(: NAMESPACES :)
|
50
|
+
declare namespace marcxml = "http://www.loc.gov/MARC21/slim";
|
51
|
+
declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
52
|
+
declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";
|
53
|
+
|
54
|
+
declare namespace bf = "http://bibframe.org/vocab/";
|
55
|
+
declare namespace madsrdf = "http://www.loc.gov/mads/rdf/v1#";
|
56
|
+
declare namespace relators = "http://id.loc.gov/vocabulary/relators/";
|
57
|
+
declare namespace identifiers = "http://id.loc.gov/vocabulary/identifiers/";
|
58
|
+
declare namespace notes = "http://id.loc.gov/vocabulary/notes/";
|
59
|
+
|
60
|
+
declare namespace an = "http://zorba.io/annotations";
|
61
|
+
declare namespace httpexpath = "http://expath.org/ns/http-client";
|
62
|
+
declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
|
63
|
+
|
64
|
+
declare namespace log = "info:lc/marc2bibframe/logging#";
|
65
|
+
declare namespace err = "http://www.w3.org/2005/xqt-errors";
|
66
|
+
declare namespace zerror = "http://zorba.io/errors";
|
67
|
+
|
68
|
+
(:~
|
69
|
+
: This variable is for the base uri for your Authorites/Concepts.
|
70
|
+
: It is the base URI for the rdf:about attribute.
|
71
|
+
:
|
72
|
+
:)
|
73
|
+
declare variable $baseuri as xs:string external := "http://example.org/";
|
74
|
+
|
75
|
+
(:~
|
76
|
+
: This variable determines whether bnodes should identify resources instead of
|
77
|
+
: http URIs, except for the "main" Work derived from each MARC record. At this time,
|
78
|
+
: the "main" Work must be identified by HTTP URI (using the $baseuri variable
|
79
|
+
: above).
|
80
|
+
:
|
81
|
+
:)
|
82
|
+
declare variable $usebnodes as xs:string external := "false";
|
83
|
+
|
84
|
+
(:~
|
85
|
+
: This variable is for the MARCXML location - externally defined.
|
86
|
+
:)
|
87
|
+
declare variable $marcxmluri as xs:string external;
|
88
|
+
|
89
|
+
(:~
|
90
|
+
: This variable is for desired serialzation. Expected values are: rdfxml (default), rdfxml-raw, ntriples, json, exhibitJSON, log
|
91
|
+
:)
|
92
|
+
declare variable $serialization as xs:string external := "rdfxml";
|
93
|
+
|
94
|
+
(:~
|
95
|
+
: This variable is for desired serialzation. Expected values are: rdfxml (default), rdfxml-raw, ntriples, json, exhibitJSON
|
96
|
+
:)
|
97
|
+
declare variable $resolveLabelsWithID as xs:string external := "false";
|
98
|
+
|
99
|
+
(:~
|
100
|
+
: If set to "true" will write log file to directory.
|
101
|
+
:)
|
102
|
+
declare variable $writelog as xs:string external := "false";
|
103
|
+
|
104
|
+
(:~
|
105
|
+
: Directory for log files. MUST end with a slash.
|
106
|
+
:)
|
107
|
+
declare variable $logdir as xs:string external := "";
|
108
|
+
|
109
|
+
(:~
|
110
|
+
Performs an http get but does not follow redirects
|
111
|
+
|
112
|
+
$l as xs:string is the label
|
113
|
+
$scheme as xs:string is the scheme
|
114
|
+
:)
|
115
|
+
declare %an:sequential function local:http-get(
|
116
|
+
$label as xs:string,
|
117
|
+
$scheme as xs:string
|
118
|
+
)
|
119
|
+
{
|
120
|
+
let $l := fn:encode-for-uri($label)
|
121
|
+
(:
|
122
|
+
let $request :=
|
123
|
+
httpexpath:send-request(
|
124
|
+
<httpexpath:request
|
125
|
+
method="GET"
|
126
|
+
href="http://id.loc.gov/authorities/{$scheme}/label/{$l}"
|
127
|
+
follow-redirect="false"/>
|
128
|
+
)
|
129
|
+
:)
|
130
|
+
let $options := fn:concat('{
|
131
|
+
"method": "GET",
|
132
|
+
"href": "http://id.loc.gov/authorities/', $scheme, '/label/', $l , '",
|
133
|
+
"options":
|
134
|
+
{
|
135
|
+
"status-only": true,
|
136
|
+
"override-media-type": "text/plain",
|
137
|
+
"follow-redirect": false,
|
138
|
+
"timeout": 5,
|
139
|
+
"user-agent": "MARC2BIBFRAME"
|
140
|
+
}
|
141
|
+
}')
|
142
|
+
let $request := http:send-request(jn:parse-json($options))
|
143
|
+
return $request
|
144
|
+
};
|
145
|
+
|
146
|
+
(:~
|
147
|
+
Outputs a resource, replacing verbose hasAuthority property
|
148
|
+
with a simple rdf:resource pointer
|
149
|
+
|
150
|
+
$resource as element() is the resource
|
151
|
+
$authuri as xs:string is the authority URI
|
152
|
+
:)
|
153
|
+
declare %an:nondeterministic function local:generate-resource(
|
154
|
+
$r as element(),
|
155
|
+
$authuri as xs:string
|
156
|
+
)
|
157
|
+
{
|
158
|
+
element { fn:name($r) } {
|
159
|
+
$r/@*,
|
160
|
+
$r/*[fn:name() ne "bf:hasAuthority"],
|
161
|
+
element bf:hasAuthority {
|
162
|
+
attribute rdf:resource { $authuri }
|
163
|
+
}
|
164
|
+
}
|
165
|
+
};
|
166
|
+
|
167
|
+
|
168
|
+
(:~
|
169
|
+
Tries to resolve Labels to URIs
|
170
|
+
|
171
|
+
$resource as element() is the resource
|
172
|
+
$authuri as xs:string is the authority URI
|
173
|
+
:)
|
174
|
+
declare %an:sequential function local:resolve-labels(
|
175
|
+
$flatrdfxml as element(rdf:RDF)
|
176
|
+
)
|
177
|
+
{
|
178
|
+
let $resources :=
|
179
|
+
for $r in $flatrdfxml/*
|
180
|
+
let $n := fn:local-name($r)
|
181
|
+
let $scheme :=
|
182
|
+
if ( fn:matches($n, "Topic|TemporalConcept") ) then
|
183
|
+
"subjects"
|
184
|
+
else
|
185
|
+
"names"
|
186
|
+
return
|
187
|
+
if ( fn:matches($n, "Person|Organization|Place|Meeting|Family|Topic|TemporalConcept") ) then
|
188
|
+
let $label := ($r/bf:authorizedAccessPoint, $r/bf:label)[1]
|
189
|
+
let $label := fn:normalize-space(xs:string($label))
|
190
|
+
let $req1 := local:http-get($label, $scheme)
|
191
|
+
let $resource :=
|
192
|
+
if ($req1("status") eq 302) then
|
193
|
+
let $authuri := xs:string($req1("headers")("X-URI"))
|
194
|
+
return local:generate-resource($r, $authuri)
|
195
|
+
else if (
|
196
|
+
$req1("status") ne 302 and
|
197
|
+
fn:ends-with($label, ".")
|
198
|
+
) then
|
199
|
+
let $l := fn:substring($label, 1, fn:string-length($label)-1)
|
200
|
+
let $req2 := local:http-get($l, $scheme)
|
201
|
+
return
|
202
|
+
if ($req2("status") eq 302) then
|
203
|
+
let $authuri := xs:string($req2("headers")("X-URI"))
|
204
|
+
return local:generate-resource($r, $authuri)
|
205
|
+
else
|
206
|
+
(: There was no match or some other message, keep moving :)
|
207
|
+
$r
|
208
|
+
else
|
209
|
+
$r
|
210
|
+
return $resource
|
211
|
+
|
212
|
+
else
|
213
|
+
$r
|
214
|
+
|
215
|
+
return <rdf:RDF>{$resources}</rdf:RDF>
|
216
|
+
};
|
217
|
+
|
218
|
+
|
219
|
+
|
220
|
+
let $startDT := fn:current-dateTime()
|
221
|
+
let $logfilename := fn:replace(fn:substring-before(xs:string($startDT), "."), "-|:", "")
|
222
|
+
let $logfilename := fn:concat($logdir, $logfilename, '.log.xml')
|
223
|
+
|
224
|
+
let $marcxml :=
|
225
|
+
if ( fn:starts-with($marcxmluri, "http://" ) or fn:starts-with($marcxmluri, "https://" ) ) then
|
226
|
+
let $json := http:get($marcxmluri)
|
227
|
+
return parsexml:parse($json("body")("content"), <parseoptions:options/>)
|
228
|
+
else
|
229
|
+
let $raw-data :=
|
230
|
+
if ( fn:starts-with($marcxmluri, "raw:" ) ) then
|
231
|
+
fn:substring($marcxmluri, 5)
|
232
|
+
else
|
233
|
+
file:read-text($marcxmluri)
|
234
|
+
let $mxml := parsexml:parse(
|
235
|
+
$raw-data,
|
236
|
+
<parseoptions:options />
|
237
|
+
)
|
238
|
+
return $mxml
|
239
|
+
let $marcxml := $marcxml//marcxml:record
|
240
|
+
|
241
|
+
let $result :=
|
242
|
+
(:for $r in $marcxml:)
|
243
|
+
for $r in $marcxml[@type="Bibliographic" or fn:not(@type)]
|
244
|
+
let $controlnum := xs:string($r/marcxml:controlfield[@tag eq "001"][1])
|
245
|
+
let $holds:=
|
246
|
+
for $hold in $marcxml[fn:string(marcxml:controlfield[@tag="004"])=$controlnum]
|
247
|
+
return $hold
|
248
|
+
let $httpuri := fn:concat($baseuri , $controlnum)
|
249
|
+
let $recordset:= element marcxml:collection{$r,$holds}
|
250
|
+
let $r :=
|
251
|
+
try {
|
252
|
+
let $rdf := marcbib2bibframe:marcbib2bibframe($recordset,$httpuri)
|
253
|
+
let $o := $rdf/child::node()[fn:name()]
|
254
|
+
let $logmsg :=
|
255
|
+
element log:success {
|
256
|
+
attribute uri {$httpuri},
|
257
|
+
attribute datetime { fn:current-dateTime() }
|
258
|
+
}
|
259
|
+
return
|
260
|
+
element result {
|
261
|
+
element logmsg {$logmsg},
|
262
|
+
element rdf {$o}
|
263
|
+
}
|
264
|
+
} catch * {
|
265
|
+
(: Could get entire stack trace from Zorba, but omitting for now. :)
|
266
|
+
let $stack1 := $zerror:stack-trace
|
267
|
+
let $logmsg :=
|
268
|
+
element log:error {
|
269
|
+
attribute uri {$httpuri},
|
270
|
+
attribute datetime { fn:current-dateTime() },
|
271
|
+
element log:error-details {
|
272
|
+
element log:error-xcode { xs:string($err:code) },
|
273
|
+
element log:error-description { xs:string($err:description) },
|
274
|
+
element log:error-file { xs:string($err:module) },
|
275
|
+
element log:error-line { xs:string($err:line-number) },
|
276
|
+
element log:error-column { xs:string($err:column-number) }
|
277
|
+
(: element log:error-stack { $stack1 } :)
|
278
|
+
},
|
279
|
+
element log:offending-record {
|
280
|
+
$r
|
281
|
+
}
|
282
|
+
}
|
283
|
+
return
|
284
|
+
element result {
|
285
|
+
element logmsg {$logmsg}
|
286
|
+
}
|
287
|
+
}
|
288
|
+
return
|
289
|
+
$r
|
290
|
+
|
291
|
+
let $rdfxml-raw :=
|
292
|
+
element rdf:RDF {
|
293
|
+
$result//rdf/child::node()[fn:name()]
|
294
|
+
}
|
295
|
+
|
296
|
+
let $rdfxml :=
|
297
|
+
if ( $serialization ne "rdfxml-raw" ) then
|
298
|
+
let $flatrdfxml := RDFXMLnested2flat:RDFXMLnested2flat($rdfxml-raw, $baseuri, $usebnodes)
|
299
|
+
return
|
300
|
+
if ($resolveLabelsWithID eq "true") then
|
301
|
+
local:resolve-labels($flatrdfxml)
|
302
|
+
else
|
303
|
+
$flatrdfxml
|
304
|
+
else
|
305
|
+
$rdfxml-raw
|
306
|
+
|
307
|
+
let $endDT := fn:current-dateTime()
|
308
|
+
let $log :=
|
309
|
+
element log:log {
|
310
|
+
attribute engine {"MarkLogic"},
|
311
|
+
attribute start {$startDT},
|
312
|
+
attribute end {$endDT},
|
313
|
+
attribute source {$marcxmluri},
|
314
|
+
attribute total-submitted { fn:count($marcxml) },
|
315
|
+
attribute total-success { fn:count($marcxml) - fn:count($result//logmsg/log:error) },
|
316
|
+
attribute total-error { fn:count($result//logmsg/log:error) },
|
317
|
+
$result//logmsg/log:*
|
318
|
+
}
|
319
|
+
|
320
|
+
let $logwritten :=
|
321
|
+
if ($writelog eq "true") then
|
322
|
+
file:write-text($logfilename, serialize($log,
|
323
|
+
<output:serialization-parameters>
|
324
|
+
<output:indent value="yes"/>
|
325
|
+
<output:method value="xml"/>
|
326
|
+
<output:omit-xml-declaration value="no"/>
|
327
|
+
</output:serialization-parameters>)
|
328
|
+
)
|
329
|
+
else
|
330
|
+
()
|
331
|
+
|
332
|
+
(:
|
333
|
+
For now, not injecting notice about an error into the JSON outputs.
|
334
|
+
There are a couple of ways to do it (one is a hack, the other is the right way)
|
335
|
+
but 1) will it break anything and 2) is there a need?
|
336
|
+
:)
|
337
|
+
let $response :=
|
338
|
+
if ($serialization eq "ntriples") then
|
339
|
+
if (fn:count($result//logmsg/log:error) > 0) then
|
340
|
+
fn:concat("# Errors encountered. View 'log' for details.", fn:codepoints-to-string(10), rdfxml2nt:rdfxml2ntriples($rdfxml))
|
341
|
+
else
|
342
|
+
rdfxml2nt:rdfxml2ntriples($rdfxml)
|
343
|
+
else if ($serialization eq "json") then
|
344
|
+
rdfxml2json:rdfxml2json($rdfxml)
|
345
|
+
else if ($serialization eq "exhibitJSON") then
|
346
|
+
bfRDFXML2exhibitJSON:bfRDFXML2exhibitJSON($rdfxml, $baseuri)
|
347
|
+
else if ($serialization eq "log") then
|
348
|
+
$log
|
349
|
+
else
|
350
|
+
if (fn:count($result//logmsg/log:error) > 0) then
|
351
|
+
element rdf:RDF {
|
352
|
+
comment {"Errors encountered. View 'log' for details."},
|
353
|
+
$rdfxml/*
|
354
|
+
}
|
355
|
+
else
|
356
|
+
$rdfxml
|
357
|
+
|
358
|
+
return $response
|
359
|
+
|