ld4l-works_rdf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +25 -0
- data/.travis.yml +14 -0
- data/CHANGES.md +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +170 -0
- data/Rakefile +2 -0
- data/ld4l-works_rdf.gemspec +42 -0
- data/lib/ld4l/works_rdf.rb +131 -0
- data/lib/ld4l/works_rdf/configuration.rb +41 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_identifier.rb +17 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_instance.rb +23 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_organization.rb +16 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_person.rb +16 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_place.rb +16 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_provider.rb +18 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_title.rb +18 -0
- data/lib/ld4l/works_rdf/models/bibframe/bibframe_work.rb +16 -0
- data/lib/ld4l/works_rdf/models/bibo/bibo_book.rb +19 -0
- data/lib/ld4l/works_rdf/models/bibo/bibo_document.rb +14 -0
- data/lib/ld4l/works_rdf/models/bibo/vivo_authorship.rb +15 -0
- data/lib/ld4l/works_rdf/models/bibo/vivo_book.rb +18 -0
- data/lib/ld4l/works_rdf/models/generic_work.rb +13 -0
- data/lib/ld4l/works_rdf/models/schema/oclc_schema_book.rb +16 -0
- data/lib/ld4l/works_rdf/models/schema/schema_book.rb +23 -0
- data/lib/ld4l/works_rdf/models/schema/schema_person.rb +17 -0
- data/lib/ld4l/works_rdf/models/schema/schema_publisher.rb +15 -0
- data/lib/ld4l/works_rdf/models/work_metadata.rb +212 -0
- data/lib/ld4l/works_rdf/services/attempt_generic_metadata_extraction.rb +41 -0
- data/lib/ld4l/works_rdf/services/conversion_services/get_rdfxml_from_marcxml.rb +44 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.ErrorCodes.xqy +56 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXML-2-MADSRDF.xqy +1702 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXML-2-RecordInfoRDF.xqy +216 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXMLBIB-2-BIBFRAME.xqy +140 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MARCXMLBIB-BFUtils.xqy +3287 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-2-BIBFRAME-Shared.xqy +4112 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-Default-2-BF.xqy +61 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.MBIB-NotatedMusic-2-BF.xqy +105 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-ExhibitJSON.xqy +119 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-JSON.xqy +193 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXML-2-Ntriples.xqy +276 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/modules/module.RDFXMLnested-2-flat.xqy +380 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/ml.xqy +239 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/saxon.xqy +134 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba.xqy +359 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba2-0.xqy +249 -0
- data/lib/ld4l/works_rdf/services/conversion_services/marc2bibframe/xbin/zorba3-0.xqy +362 -0
- data/lib/ld4l/works_rdf/services/conversion_services/saxon/saxon9he.jar +0 -0
- data/lib/ld4l/works_rdf/services/get_metadata_from_marcxml_uri.rb +43 -0
- data/lib/ld4l/works_rdf/services/get_metadata_from_oclc_uri.rb +25 -0
- data/lib/ld4l/works_rdf/services/get_metadata_from_solr_query.rb +29 -0
- data/lib/ld4l/works_rdf/services/get_metadata_from_vivo_uri.rb +25 -0
- data/lib/ld4l/works_rdf/services/get_model_from_uri.rb +62 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_bibframe_models.rb +60 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_bibo_model.rb +42 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_generic_model.rb +41 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_oclc_model.rb +42 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_solr_doc.rb +67 -0
- data/lib/ld4l/works_rdf/services/metadata_services/get_metadata_from_vivo_model.rb +45 -0
- data/lib/ld4l/works_rdf/services/metadata_services/set_error_in_metadata.rb +27 -0
- data/lib/ld4l/works_rdf/services/model_services/populate_bibframe_models_from_repository.rb +46 -0
- data/lib/ld4l/works_rdf/services/model_services/populate_generic_model_from_repository.rb +30 -0
- data/lib/ld4l/works_rdf/services/model_services/populate_oclc_model_from_repository.rb +27 -0
- data/lib/ld4l/works_rdf/services/model_services/populate_vivo_model_from_repository.rb +27 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/get_marcxml_from_uri.rb +35 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/get_rdfxml_from_uri.rb +37 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/get_solr_results_from_solr_query.rb +35 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/get_turtle_from_uri.rb +37 -0
- data/lib/ld4l/works_rdf/services/negotiation_services/response_header.rb +51 -0
- data/lib/ld4l/works_rdf/services/repository_services/populate_graph_from_rdfxml.rb +22 -0
- data/lib/ld4l/works_rdf/services/repository_services/populate_graph_from_turtle.rb +21 -0
- data/lib/ld4l/works_rdf/services/repository_services/populate_repository_from_graph.rb +25 -0
- data/lib/ld4l/works_rdf/version.rb +5 -0
- data/lib/ld4l/works_rdf/vocab/bf.rb +29 -0
- data/lib/ld4l/works_rdf/vocab/bgn.rb +5 -0
- data/lib/ld4l/works_rdf/vocab/bibo.rb +10 -0
- data/lib/ld4l/works_rdf/vocab/library.rb +6 -0
- data/lib/ld4l/works_rdf/vocab/vitro.rb +5 -0
- data/lib/ld4l/works_rdf/vocab/vivo.rb +11 -0
- data/spec/ld4l/works_rdf/configuration_spec.rb +166 -0
- data/spec/ld4l/works_rdf/models/books/vivo_book_rdf_spec.rb +267 -0
- data/spec/ld4l/works_rdf/services/get_metadata_from_uri_spec.rb +39 -0
- data/spec/ld4l/works_rdf/services/get_model_from_uri_spec.rb +34 -0
- data/spec/ld4l/works_rdf_spec.rb +53 -0
- data/spec/spec_helper.rb +26 -0
- metadata +321 -0
@@ -0,0 +1,276 @@
|
|
1
|
+
xquery version "1.0";
|
2
|
+
|
3
|
+
(:
|
4
|
+
: Module Name: RDFXML 2 ntriples
|
5
|
+
:
|
6
|
+
: Module Version: 1.0
|
7
|
+
:
|
8
|
+
: Date: 2010 Oct 18
|
9
|
+
:
|
10
|
+
: Copyright: Public Domain
|
11
|
+
:
|
12
|
+
: Proprietary XQuery Extensions Used: none
|
13
|
+
:
|
14
|
+
: Xquery Specification: January 2007
|
15
|
+
:
|
16
|
+
: Module Overview: Takes RDF/XML converts to ntriples.
|
17
|
+
: xdmp extension used in order to quote/escape otherwise valid
|
18
|
+
: XML.
|
19
|
+
:
|
20
|
+
: NB: This file has been modified to remove a ML dependency at
|
21
|
+
: around line 126 (xdmp:quote). Could be a problem for Literal types.
|
22
|
+
:)
|
23
|
+
|
24
|
+
(:~
|
25
|
+
: Takes RDF/XML and transforms to ntriples. xdmp extension
|
26
|
+
: used in order to quote/escape otherwise valid XML.
|
27
|
+
:
|
28
|
+
: @author Kevin Ford (kefo@loc.gov)
|
29
|
+
: @since October 18, 2010
|
30
|
+
: @version 1.0
|
31
|
+
:)
|
32
|
+
module namespace rdfxml2nt = "info:lc/id-modules/rdfxml2nt#";
|
33
|
+
declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
34
|
+
|
35
|
+
(:~
|
36
|
+
: This is the main function. Input RDF/XML, output ntiples.
|
37
|
+
: All other functions are local.
|
38
|
+
:
|
39
|
+
: @param $rdfxml node() is the RDF/XML
|
40
|
+
: @return ntripes as xs:string
|
41
|
+
:)
|
42
|
+
declare function rdfxml2nt:rdfxml2ntriples($rdfxml as node()) as xs:string {
|
43
|
+
if( $rdfxml[1][fn:local-name() eq "RDF"] ) then
|
44
|
+
let $resources :=
|
45
|
+
for $i in $rdfxml/child::node()[fn:name()]
|
46
|
+
return rdfxml2nt:parse_class($i, "")
|
47
|
+
return fn:string-join($resources, "
")
|
48
|
+
else ("Invalid source: RDF/XML should have a root node of RDF.")
|
49
|
+
};
|
50
|
+
|
51
|
+
(:~
|
52
|
+
: This function parses a RDF Class.
|
53
|
+
:
|
54
|
+
: @param $node node()
|
55
|
+
: @param $uri_pass xs:string, is the URI passed
|
56
|
+
: from the property evaluation and to be
|
57
|
+
: used in the absence of a rdf:about or rdf:nodeID
|
58
|
+
: @return ntripes as xs:string
|
59
|
+
:)
|
60
|
+
declare function rdfxml2nt:parse_class(
|
61
|
+
$node as node(),
|
62
|
+
$uri_pass as xs:string
|
63
|
+
) as item()* {
|
64
|
+
|
65
|
+
let $uri :=
|
66
|
+
if ($node/@rdf:about ne "") then
|
67
|
+
fn:concat( "<", fn:data($node/@rdf:about), ">")
|
68
|
+
else if ($node/@rdf:about eq "") then
|
69
|
+
fn:concat( "<", fn:data($node/ancestor::rdf:RDF[1]/@xml:base), ">")
|
70
|
+
else if ($node/@rdf:ID ne "" and $node/ancestor::rdf:RDF[1]/@xml:base) then
|
71
|
+
fn:concat( "<", fn:data($node/ancestor::rdf:RDF[1]/@xml:base), $node/@rdf:ID, ">")
|
72
|
+
else if ($node/@rdf:nodeID) then
|
73
|
+
fn:concat( "_:", fn:data($node/@rdf:nodeID))
|
74
|
+
else if ($uri_pass ne "") then
|
75
|
+
$uri_pass
|
76
|
+
else
|
77
|
+
rdfxml2nt:return_bnode($node)
|
78
|
+
let $triple :=
|
79
|
+
if (fn:local-name($node) eq "Description") then
|
80
|
+
(: fn:concat( $uri, " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <" , $node/child::node()[fn:name(.) eq "rdf:type"]/@rdf:resource , "> . " , fn:codepoints-to-string(10)) :)
|
81
|
+
""
|
82
|
+
else if (fn:namespace-uri($node) and fn:local-name($node)) then
|
83
|
+
fn:concat( $uri, " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <" , fn:namespace-uri($node) , fn:local-name($node) , "> . " , fn:codepoints-to-string(10))
|
84
|
+
else if (fn:namespace-uri($node/parent::node()) and fn:local-name($node)) then
|
85
|
+
(: this is hardly sound, but seems to fix the issue :)
|
86
|
+
fn:concat( $uri, " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <" , fn:namespace-uri($node/parent::node()) , fn:local-name($node) , "> . " , fn:codepoints-to-string(10))
|
87
|
+
else ""
|
88
|
+
return
|
89
|
+
if ($node/child::node()[fn:not(rdf:type)]) then
|
90
|
+
let $properties :=
|
91
|
+
for $i at $pos in $node/child::node()[fn:not(rdf:type) and fn:name()]
|
92
|
+
return rdfxml2nt:parse_property($i , $uri)
|
93
|
+
return fn:concat($triple , fn:string-join($properties , ""))
|
94
|
+
else
|
95
|
+
$triple
|
96
|
+
};
|
97
|
+
|
98
|
+
(:~
|
99
|
+
: This function parses a RDF Property
|
100
|
+
:
|
101
|
+
: @param $node node()
|
102
|
+
: @param $uri xs:string, is the URI passed
|
103
|
+
: from the Class evaluation
|
104
|
+
: @return ntripes as xs:string
|
105
|
+
:)
|
106
|
+
declare function rdfxml2nt:parse_property(
|
107
|
+
$node as node(),
|
108
|
+
$uri as xs:string
|
109
|
+
) as item()* {
|
110
|
+
|
111
|
+
let $resource_string :=
|
112
|
+
if ($node/@rdf:resource) then
|
113
|
+
fn:concat("<" , fn:data($node/@rdf:resource) , ">")
|
114
|
+
else if ($node[@rdf:parseType eq "Collection"] and fn:not($node/@rdf:nodeID)) then
|
115
|
+
rdfxml2nt:return_bnode($node/child::node()[1])
|
116
|
+
else if ($node/child::node()[1]/@rdf:nodeID) then
|
117
|
+
fn:concat("_:" , fn:data($node/child::node()[1]/@rdf:nodeID))
|
118
|
+
else if ($node/child::node()[1]/@rdf:about) then
|
119
|
+
fn:concat("<" , fn:data($node/child::node()[1]/@rdf:about) , ">")
|
120
|
+
else if ($node[@rdf:parseType eq "Literal"]) then
|
121
|
+
fn:concat('"' ,
|
122
|
+
fn:replace(
|
123
|
+
fn:replace(
|
124
|
+
fn:replace(
|
125
|
+
$node/child::node()/text(),
|
126
|
+
'"',
|
127
|
+
'\\"'
|
128
|
+
),
|
129
|
+
'\n',
|
130
|
+
'\\r\\n'
|
131
|
+
),
|
132
|
+
"\t",
|
133
|
+
'\\t'
|
134
|
+
),
|
135
|
+
'"^^<http://www.w3.org/2000/01/rdf-schema#Literal>')
|
136
|
+
(: '"Comment"' :)
|
137
|
+
else if (fn:local-name($node/child::node()[fn:name()][1]) ne "") then
|
138
|
+
rdfxml2nt:return_bnode($node/child::node()[fn:name()][1])
|
139
|
+
else
|
140
|
+
fn:concat('"' , rdfxml2nt:clean_string(xs:string($node)) , '"',
|
141
|
+
if ($node/@xml:lang) then
|
142
|
+
fn:concat('@' , xs:string($node/@xml:lang) )
|
143
|
+
else if ($node/@rdf:datatype) then
|
144
|
+
fn:concat('^^<' , xs:string($node/@rdf:datatype) , '>' )
|
145
|
+
else ()
|
146
|
+
)
|
147
|
+
|
148
|
+
let $triple := fn:concat( $uri , " <" , fn:namespace-uri($node) , fn:local-name($node) , "> " , $resource_string , " . ", fn:codepoints-to-string(10) )
|
149
|
+
return
|
150
|
+
if ($node/child::node()[fn:name()] and $node[@rdf:parseType eq "Collection"]) then
|
151
|
+
let $classes := rdfxml2nt:parse_collection($node/child::node()[fn:name()][1] , $resource_string)
|
152
|
+
return fn:concat($triple , fn:string-join($classes,''))
|
153
|
+
|
154
|
+
else if ($node/child::node()[fn:name()] and fn:not($node/@rdf:parseType)) then
|
155
|
+
(: is this the correct "if statement"? Could there be a parseType
|
156
|
+
*and* a desire to traverse the tree at this point? :)
|
157
|
+
let $classes :=
|
158
|
+
for $i in $node/child::node()[fn:name()]
|
159
|
+
return rdfxml2nt:parse_class($i , $resource_string)
|
160
|
+
return fn:concat($triple , fn:string-join($classes,""))
|
161
|
+
else
|
162
|
+
$triple
|
163
|
+
|
164
|
+
};
|
165
|
+
|
166
|
+
(:~
|
167
|
+
: Parse a rdf:parseType="Collection" element
|
168
|
+
:
|
169
|
+
: @param $node node()
|
170
|
+
: @param $uri xs:string, is the URI passed
|
171
|
+
: from the Property evaluation
|
172
|
+
: @return ntripes as xs:string
|
173
|
+
:)
|
174
|
+
declare function rdfxml2nt:parse_collection(
|
175
|
+
$node as node(),
|
176
|
+
$uri as xs:string
|
177
|
+
) as item()* {
|
178
|
+
|
179
|
+
let $resource_string :=
|
180
|
+
if ($node/@rdf:resource) then
|
181
|
+
fn:concat("<" , fn:data($node/@rdf:resource) , ">")
|
182
|
+
else if ($node/@rdf:about) then
|
183
|
+
fn:concat( "<", fn:data($node/@rdf:about), ">")
|
184
|
+
else if ($node/@rdf:nodeID) then
|
185
|
+
fn:concat( "_:", fn:data($node/@rdf:nodeID))
|
186
|
+
else
|
187
|
+
rdfxml2nt:return_bnode($node/child::node()[fn:name()][1])
|
188
|
+
|
189
|
+
let $triple := fn:concat( $uri , " <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> " , $resource_string , " . " , fn:codepoints-to-string(10))
|
190
|
+
let $following_bnode :=
|
191
|
+
if ($node/following-sibling::node()[fn:name()][1]) then
|
192
|
+
rdfxml2nt:return_bnode_collection($node/following-sibling::node()[fn:name()][1])
|
193
|
+
else
|
194
|
+
fn:false()
|
195
|
+
let $rest :=
|
196
|
+
if ($following_bnode) then
|
197
|
+
fn:concat( $uri , " <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> " , $following_bnode , " . " , fn:codepoints-to-string(10))
|
198
|
+
else
|
199
|
+
fn:concat( $uri , " <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> . " , fn:codepoints-to-string(10))
|
200
|
+
|
201
|
+
let $uri := $resource_string
|
202
|
+
let $class := rdfxml2nt:parse_class($node, $uri)
|
203
|
+
|
204
|
+
return
|
205
|
+
if ($following_bnode) then
|
206
|
+
let $sibling := rdfxml2nt:parse_collection($node/following-sibling::node()[fn:name()][1] , $following_bnode)
|
207
|
+
return fn:concat($triple, $rest, $class, fn:string-join( $sibling, "" ) )
|
208
|
+
else
|
209
|
+
fn:concat($triple, $rest, $class)
|
210
|
+
|
211
|
+
};
|
212
|
+
|
213
|
+
(:~
|
214
|
+
: Helper funtion, to return a _bnode
|
215
|
+
:
|
216
|
+
: @param $node node()
|
217
|
+
: @return _bnode as xs:string
|
218
|
+
:)
|
219
|
+
declare function rdfxml2nt:return_bnode($node as node()) as xs:string
|
220
|
+
{
|
221
|
+
let $uri4bnode := rdfxml2nt:return_uri4bnode($node/ancestor-or-self::node()[fn:name()='rdf:RDF']/child::node()[1]/@rdf:about)
|
222
|
+
let $unique_num := xs:integer( fn:count($node/ancestor-or-self::node()) + fn:count($node/preceding::node()) )
|
223
|
+
return fn:concat("_:bnode" , xs:string($unique_num) , $uri4bnode)
|
224
|
+
};
|
225
|
+
|
226
|
+
(:~
|
227
|
+
: Helper funtion, to return a _bnode for a collection
|
228
|
+
:
|
229
|
+
: @param $node node()
|
230
|
+
: @return _bnode as xs:string
|
231
|
+
:)
|
232
|
+
declare function rdfxml2nt:return_bnode_collection($node as node()) as xs:string {
|
233
|
+
let $uri4bnode := rdfxml2nt:return_uri4bnode($node/ancestor-or-self::node()[fn:name()='rdf:RDF']/child::node()[1]/@rdf:about)
|
234
|
+
let $unique_num := xs:integer( fn:count($node/ancestor-or-self::node()) + fn:count($node/preceding::node()) )
|
235
|
+
return fn:concat("_:bnode" , "0" , xs:string($unique_num))
|
236
|
+
};
|
237
|
+
|
238
|
+
(:~
|
239
|
+
: bnode distinction - munges the URI in an attempt to
|
240
|
+
: create a better probability for bnode uniqueness
|
241
|
+
:
|
242
|
+
: @param $uri xs:string
|
243
|
+
: @return _bnode as xs:string
|
244
|
+
:)
|
245
|
+
declare function rdfxml2nt:return_uri4bnode($uri as xs:string) as xs:string {
|
246
|
+
let $uriparts := fn:tokenize($uri, '/')
|
247
|
+
let $uriparts4bnode :=
|
248
|
+
for $u in $uriparts
|
249
|
+
let $str :=
|
250
|
+
if ( fn:matches($u , '\.|:|#') eq fn:false() ) then
|
251
|
+
$u
|
252
|
+
else ()
|
253
|
+
return $str
|
254
|
+
return fn:string-join( $uriparts4bnode , '')
|
255
|
+
};
|
256
|
+
|
257
|
+
|
258
|
+
(:~
|
259
|
+
: Clean string of odd characters.
|
260
|
+
:
|
261
|
+
: @param $string string to clean
|
262
|
+
: @return xs:string
|
263
|
+
:)
|
264
|
+
declare function rdfxml2nt:clean_string($str as xs:string) as xs:string
|
265
|
+
{
|
266
|
+
let $str := fn:replace( $str, '\\', '\\\\')
|
267
|
+
let $str := fn:replace( $str , '"' , '\\"')
|
268
|
+
let $str := fn:replace( $str, "\n", "\\r\\n")
|
269
|
+
let $str := fn:replace( $str, "’", "'")
|
270
|
+
let $str := fn:replace( $str, '“|”', '\\"')
|
271
|
+
let $str := fn:replace( $str, 'ā', '\\u0101')
|
272
|
+
return $str
|
273
|
+
};
|
274
|
+
|
275
|
+
|
276
|
+
|
@@ -0,0 +1,380 @@
|
|
1
|
+
xquery version "1.0";
|
2
|
+
|
3
|
+
(:
|
4
|
+
: Module Name: BIBFRAME RDF/XML Nested (RAW) 2 RDF/XML Flat (Condensed)
|
5
|
+
:
|
6
|
+
: Module Version: 1.0
|
7
|
+
:
|
8
|
+
: Date: 2013 10 Jan
|
9
|
+
:
|
10
|
+
: Copyright: Public Domain
|
11
|
+
:
|
12
|
+
: Proprietary XQuery Extensions Used: none
|
13
|
+
:
|
14
|
+
: Xquery Specification: January 2007
|
15
|
+
:
|
16
|
+
: Module Overview: Takes BIBFRAME RDF/XML, which can be
|
17
|
+
: deeply nested, and flattens it by assigning each resource
|
18
|
+
: a URI. This should really be generalized to RDF, i.e.
|
19
|
+
: not BF specific.
|
20
|
+
:
|
21
|
+
:)
|
22
|
+
|
23
|
+
(:~
|
24
|
+
: Takes BIBFRAME RDF/XML, which can be
|
25
|
+
: deeply nested, and flattens it by assigning each resource
|
26
|
+
: a URI.
|
27
|
+
:
|
28
|
+
: @author Kevin Ford (kefo@loc.gov)
|
29
|
+
: @since January 10, 2013
|
30
|
+
: @update May 23, 2013
|
31
|
+
: @version 1.0
|
32
|
+
:)
|
33
|
+
|
34
|
+
|
35
|
+
module namespace RDFXMLnested2flat = 'info:lc/bf-modules/RDFXMLnested2flat#';
|
36
|
+
|
37
|
+
declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
38
|
+
declare namespace rdfs = "http://www.w3.org/2000/01/rdf-schema#";
|
39
|
+
|
40
|
+
declare namespace bf = "http://bibframe.org/vocab/";
|
41
|
+
declare namespace madsrdf = "http://www.loc.gov/mads/rdf/v1#";
|
42
|
+
declare namespace relators = "http://id.loc.gov/vocabulary/relators/";
|
43
|
+
|
44
|
+
|
45
|
+
declare variable $RDFXMLnested2flat:resourcesToIgnore :=
|
46
|
+
<ignore>
|
47
|
+
<class>Provider</class>
|
48
|
+
<class>Identifier</class>
|
49
|
+
<class>Authority</class>
|
50
|
+
</ignore>;
|
51
|
+
|
52
|
+
declare variable $RDFXMLnested2flat:inverses :=
|
53
|
+
<inverses>
|
54
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Annotation">
|
55
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:annotates" />
|
56
|
+
</inverse>
|
57
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Description">
|
58
|
+
<replace lookForOnSource="bf:describedIn" enterOnTarget="bf:descriptionOf" />
|
59
|
+
</inverse>
|
60
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Description">
|
61
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:annotates" />
|
62
|
+
</inverse>
|
63
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Summary">
|
64
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:summaryOf" />
|
65
|
+
</inverse>
|
66
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Review">
|
67
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:annotates" />
|
68
|
+
</inverse>
|
69
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Review">
|
70
|
+
<replace lookForOnSource="bf:reviewedIn" enterOnTarget="bf:reviews" />
|
71
|
+
</inverse>
|
72
|
+
<inverse sourceResource="bf:Work" targetResource="bf:TableOfContents">
|
73
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:tableOfContentsFor" />
|
74
|
+
</inverse>
|
75
|
+
<inverse sourceResource="bf:Instance" targetResource="bf:HeldMaterial">
|
76
|
+
<replace lookForOnSource="bf:heldMaterial" enterOnTarget="bf:holdingFor" />
|
77
|
+
</inverse>
|
78
|
+
|
79
|
+
<inverse sourceResource="bf:HeldMaterial" targetResource="bf:HeldItem">
|
80
|
+
<replace lookForOnSource="bf:heldItem" enterOnTarget="bf:componentOf" />
|
81
|
+
</inverse>
|
82
|
+
<inverse sourceResource="bf:Instance" targetResource="bf:HeldItem">
|
83
|
+
<replace lookForOnSource="bf:heldItem" enterOnTarget="bf:holdingFor" />
|
84
|
+
</inverse>
|
85
|
+
|
86
|
+
<!--old :-->
|
87
|
+
<inverse sourceResource="bf:Instance" targetResource="bf:Holding">
|
88
|
+
<replace lookForOnSource="bf:hasHolding" enterOnTarget="bf:holds" />
|
89
|
+
</inverse>
|
90
|
+
<inverse sourceResource="bf:Instance" targetResource="bf:Annotation">
|
91
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:annotates" />
|
92
|
+
</inverse>
|
93
|
+
<inverse sourceResource="bf:Person" targetResource="bf:Annotation">
|
94
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:annotates" />
|
95
|
+
</inverse>
|
96
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Instance">
|
97
|
+
<replace lookForOnSource="bf:hasInstance" enterOnTarget="bf:instanceOf" />
|
98
|
+
</inverse>
|
99
|
+
</inverses>;
|
100
|
+
|
101
|
+
(:~
|
102
|
+
: This is the main function. Takes BIBFRAME RDF/XML, which can be
|
103
|
+
: deeply nested, and flattens it by assigning each resource
|
104
|
+
: a URI. This should really be generalized to RDF,
|
105
|
+
: i.e. not BF specific.
|
106
|
+
:
|
107
|
+
: @param $rdfxml node() is the RDF/XML
|
108
|
+
: @param $baseuri xs:string is the base uri for identifiers
|
109
|
+
: @return element rdf:RDF
|
110
|
+
:)
|
111
|
+
declare function RDFXMLnested2flat:RDFXMLnested2flat
|
112
|
+
(
|
113
|
+
$rdfxml as element(rdf:RDF),
|
114
|
+
$baseuri as xs:string,
|
115
|
+
$usebnodes as xs:string
|
116
|
+
)
|
117
|
+
as element(rdf:RDF)
|
118
|
+
{
|
119
|
+
|
120
|
+
let $resources := RDFXMLnested2flat:identifyClasses($rdfxml, $baseuri, $usebnodes, 0)
|
121
|
+
let $resources := RDFXMLnested2flat:flatten($resources)
|
122
|
+
let $resources := RDFXMLnested2flat:removeNesting($resources)
|
123
|
+
let $resources := RDFXMLnested2flat:insertInverses($resources, $usebnodes)
|
124
|
+
return
|
125
|
+
(: ntra changed this to an inline element from constructed, so I control the namespaces added.
|
126
|
+
|
127
|
+
:)
|
128
|
+
|
129
|
+
<rdf:RDF
|
130
|
+
xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
131
|
+
xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#"
|
132
|
+
xmlns:bf = "http://bibframe.org/vocab/"
|
133
|
+
xmlns:madsrdf = "http://www.loc.gov/mads/rdf/v1#"
|
134
|
+
xmlns:relators = "http://id.loc.gov/vocabulary/relators/"
|
135
|
+
>
|
136
|
+
|
137
|
+
{
|
138
|
+
|
139
|
+
$rdfxml/@*,
|
140
|
+
for $w in $resources/self::bf:Work
|
141
|
+
order by $w/@rdf:about
|
142
|
+
return $w,
|
143
|
+
|
144
|
+
|
145
|
+
$resources/self::bf:Instance,
|
146
|
+
$resources/self::bf:Authority,
|
147
|
+
$resources/self::bf:Annotation,
|
148
|
+
$resources/self::bf:HeldMaterial,
|
149
|
+
$resources/self::bf:HeldItem,
|
150
|
+
$resources/self::bf:Holding,
|
151
|
+
$resources/self::bf:Summary,
|
152
|
+
$resources/self::bf:Review,
|
153
|
+
$resources/self::bf:TableOfContents,
|
154
|
+
$resources/self::bf:*[fn:not(fn:matches(fn:local-name(), "(Work|Instance|Authority|Annotation|Holding|HeldMaterial|HeldItem|Summary|Review|TableOfContents)"))]
|
155
|
+
|
156
|
+
}
|
157
|
+
</rdf:RDF>
|
158
|
+
};
|
159
|
+
|
160
|
+
|
161
|
+
(:~
|
162
|
+
: Flattens the RDF/XML. Extract all identified resources.
|
163
|
+
:
|
164
|
+
: @param $resources element()* are the resources.
|
165
|
+
: @return element() resources
|
166
|
+
:)
|
167
|
+
declare function RDFXMLnested2flat:flatten($resources as element()*)
|
168
|
+
as element()*
|
169
|
+
{
|
170
|
+
|
171
|
+
(:
|
172
|
+
let $resources := ($resources[@rdf:about],$resources//child::node()[@rdf:about])
|
173
|
+
return $resources
|
174
|
+
:)
|
175
|
+
for $r in $resources//@rdf:about|$resources//@rdf:nodeID
|
176
|
+
return $r/parent::node()[1]
|
177
|
+
|
178
|
+
};
|
179
|
+
|
180
|
+
|
181
|
+
(:~
|
182
|
+
: Identify resources.
|
183
|
+
: This maintains the nested structure. It
|
184
|
+
: is called recursively.
|
185
|
+
:
|
186
|
+
: @param $rdfxml node() is the RDF/XML
|
187
|
+
: @param $baseuri xs:string is the base uri for identifiers
|
188
|
+
: @param $place xs:integer is passed on to ensure unique ID assignment
|
189
|
+
: @return element() resources
|
190
|
+
:)
|
191
|
+
declare function RDFXMLnested2flat:identifyClasses
|
192
|
+
(
|
193
|
+
$rdfxml as element(rdf:RDF),
|
194
|
+
$baseuri as xs:string,
|
195
|
+
$usebnodes as xs:string,
|
196
|
+
$place as xs:integer
|
197
|
+
)
|
198
|
+
as element()*
|
199
|
+
{
|
200
|
+
|
201
|
+
let $ignore := fn:string-join($RDFXMLnested2flat:resourcesToIgnore/class, " ")
|
202
|
+
|
203
|
+
let $resources := $rdfxml/child::node()[fn:matches(fn:local-name(), "^([A-Z])([a-z]+)")]
|
204
|
+
let $identified-resources :=
|
205
|
+
for $r at $pos in $resources
|
206
|
+
let $n := fn:lower-case(fn:local-name($r))
|
207
|
+
let $baseuri-new :=
|
208
|
+
if ($r/@rdf:about) then
|
209
|
+
xs:string($r/@rdf:about)
|
210
|
+
else if ($usebnodes eq "true") then
|
211
|
+
"b"
|
212
|
+
else
|
213
|
+
$baseuri
|
214
|
+
where fn:not(fn:contains($ignore, fn:local-name($r)))
|
215
|
+
return
|
216
|
+
element {fn:name($r)} {
|
217
|
+
if ($r/@rdf:about) then
|
218
|
+
$r/@rdf:about
|
219
|
+
else if ($usebnodes eq "true") then
|
220
|
+
attribute rdf:nodeID { fn:concat($baseuri-new, $n, ($pos + $place)) }
|
221
|
+
else
|
222
|
+
attribute rdf:about { fn:concat($baseuri-new, $n, ($pos + $place)) },
|
223
|
+
|
224
|
+
for $p at $spot in $r/*
|
225
|
+
return
|
226
|
+
if ($p/child::node()[fn:matches(fn:local-name(), "^([A-Z])([a-z]+)")]) then
|
227
|
+
let $classes := $p/child::node()[fn:matches(fn:local-name(), "^([A-Z])([a-z]+)")]
|
228
|
+
return
|
229
|
+
element { fn:name($p) } {
|
230
|
+
$p/@*,
|
231
|
+
RDFXMLnested2flat:identifyClasses(<rdf:RDF>{$classes}</rdf:RDF>, $baseuri-new, $usebnodes, ($pos + $spot + $place))
|
232
|
+
}
|
233
|
+
else
|
234
|
+
$p
|
235
|
+
}
|
236
|
+
let $skipped-resources :=
|
237
|
+
for $r at $pos in $resources
|
238
|
+
let $n := fn:lower-case(fn:local-name($r))
|
239
|
+
where fn:contains($ignore, fn:local-name($r))
|
240
|
+
return $r
|
241
|
+
|
242
|
+
return ($identified-resources, $skipped-resources)
|
243
|
+
|
244
|
+
};
|
245
|
+
|
246
|
+
|
247
|
+
(:
|
248
|
+
declare variable $RDFXMLnested2flat:inverses :=
|
249
|
+
<inverses>
|
250
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Annotation">
|
251
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:annotates" />
|
252
|
+
</inverse>
|
253
|
+
<inverse sourceResource="bf:Instance" targetResource="bf:Holding">
|
254
|
+
<replace lookForOnSource="bf:hasHolding" enterOnTarget="bf:holds" />
|
255
|
+
</inverse>
|
256
|
+
<inverse sourceResource="bf:Instance" targetResource="bf:Annotation">
|
257
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:annotates" />
|
258
|
+
</inverse>
|
259
|
+
<inverse sourceResource="bf:Person" targetResource="bf:Annotation">
|
260
|
+
<replace lookForOnSource="bf:hasAnnotation" enterOnTarget="bf:annotates" />
|
261
|
+
</inverse>
|
262
|
+
<inverse sourceResource="bf:Work" targetResource="bf:Instance">
|
263
|
+
<replace lookForOnSource="bf:hasInstance" enterOnTarget="bf:instanceOf" />
|
264
|
+
</inverse>
|
265
|
+
</inverses>;
|
266
|
+
:)
|
267
|
+
(:~
|
268
|
+
: Insert inverse relations.
|
269
|
+
:
|
270
|
+
: @param $resources element()* are the resources.
|
271
|
+
: @return element() resources
|
272
|
+
:)
|
273
|
+
declare function RDFXMLnested2flat:insertInverses(
|
274
|
+
$resources as element()*,
|
275
|
+
$usebnodes as xs:string)
|
276
|
+
as element()*
|
277
|
+
{
|
278
|
+
|
279
|
+
let $targets := fn:string-join($RDFXMLnested2flat:inverses/inverse/@targetResource, " ")
|
280
|
+
(:nate: this won't work because bf:tableOfContents is part of bf:tableOfContentsFor, etc:)
|
281
|
+
(: let $remove-props := fn:concat(
|
282
|
+
fn:string-join($RDFXMLnested2flat:inverses/inverse/replace/@enterOnTarget, " "),
|
283
|
+
" ",
|
284
|
+
fn:string-join($RDFXMLnested2flat:inverses/inverse/replace/@lookForOnSource, " ")
|
285
|
+
):)
|
286
|
+
let $remove-props := fn:concat(
|
287
|
+
fn:string-join($RDFXMLnested2flat:inverses/inverse/replace/@enterOnTarget, "|"),
|
288
|
+
"|",
|
289
|
+
fn:string-join($RDFXMLnested2flat:inverses/inverse/replace/@lookForOnSource, "|")
|
290
|
+
)
|
291
|
+
let $modified-targets :=
|
292
|
+
for $r in $resources
|
293
|
+
let $uri := xs:string(($r/@rdf:about|$r/@rdf:nodeID)[1])
|
294
|
+
let $n := xs:string(fn:name($r))
|
295
|
+
let $lookFors := $RDFXMLnested2flat:inverses/inverse[@targetResource = $n]
|
296
|
+
where fn:contains($targets, $n)
|
297
|
+
return
|
298
|
+
element {fn:name($r)} {
|
299
|
+
$r/@*,
|
300
|
+
|
301
|
+
$r/*[fn:not( fn:matches( fn:name(),$remove-props ))],
|
302
|
+
|
303
|
+
for $lf in $lookFors
|
304
|
+
let $replace := $lf/replace
|
305
|
+
let $related-resources := $resources[fn:name() = $lf/@sourceResource and child::node()[fn:name() = $replace/@lookForOnSource and xs:string(@rdf:resource) eq $uri]]
|
306
|
+
let $distinct-abouts := fn:distinct-values($related-resources/@rdf:about)
|
307
|
+
return
|
308
|
+
for $rr in $distinct-abouts
|
309
|
+
return
|
310
|
+
element { xs:string($replace/@enterOnTarget) } {
|
311
|
+
if ($usebnodes eq "false") then
|
312
|
+
attribute rdf:resource { xs:string($rr) }
|
313
|
+
else
|
314
|
+
attribute rdf:nodeID { xs:string($rr) }
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
(:
|
319
|
+
Need to figure out which resources were not processed as
|
320
|
+
targets in the above.
|
321
|
+
|
322
|
+
Some "targets" may be sources in other situations, but
|
323
|
+
they will have already been processed and must be
|
324
|
+
bypassed.
|
325
|
+
:)
|
326
|
+
let $unmodified-resources :=
|
327
|
+
for $r in $resources
|
328
|
+
let $uri := xs:string($r/@rdf:about)
|
329
|
+
let $n := xs:string(fn:name($r))
|
330
|
+
where fn:not(fn:contains($targets, $n))
|
331
|
+
return
|
332
|
+
element {fn:name($r)} {
|
333
|
+
$r/@*,
|
334
|
+
$r/*[fn:not( fn:matches( fn:name(),$remove-props ))]
|
335
|
+
}
|
336
|
+
|
337
|
+
return ($modified-targets, $unmodified-resources)
|
338
|
+
|
339
|
+
|
340
|
+
};
|
341
|
+
|
342
|
+
(:~
|
343
|
+
: Remove nesting from extracted, identified resources.
|
344
|
+
:
|
345
|
+
: @param $resources element()* are the resources.
|
346
|
+
: @return element() resources
|
347
|
+
:)
|
348
|
+
declare function RDFXMLnested2flat:removeNesting(
|
349
|
+
$resources as element()*
|
350
|
+
) as element()*
|
351
|
+
{
|
352
|
+
|
353
|
+
let $simplified-resources :=
|
354
|
+
for $r in $resources
|
355
|
+
let $n := fn:lower-case(fn:local-name($r))
|
356
|
+
return
|
357
|
+
element {fn:name($r)} {
|
358
|
+
$r/@*,
|
359
|
+
|
360
|
+
for $p in $r/*
|
361
|
+
return
|
362
|
+
if ($p/child::node()[@rdf:about]) then
|
363
|
+
let $classes := $p/child::node()[fn:matches(fn:local-name(), "^([A-Z])([a-z]+)")]
|
364
|
+
return
|
365
|
+
element { fn:name($p) } {
|
366
|
+
attribute rdf:resource { xs:string($p/child::node()[@rdf:about]/@rdf:about) }
|
367
|
+
}
|
368
|
+
else if ($p/child::node()[@rdf:nodeID]) then
|
369
|
+
let $classes := $p/child::node()[fn:matches(fn:local-name(), "^([A-Z])([a-z]+)")]
|
370
|
+
return
|
371
|
+
element { fn:name($p) } {
|
372
|
+
attribute rdf:nodeID { xs:string($p/child::node()[@rdf:nodeID]/@rdf:nodeID) }
|
373
|
+
}
|
374
|
+
else
|
375
|
+
$p
|
376
|
+
}
|
377
|
+
|
378
|
+
return $simplified-resources
|
379
|
+
|
380
|
+
};
|