@lindas/trifid-plugin-ckan 7.0.1 → 7.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -1
- package/README.md +114 -114
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +8 -8
- package/src/ckan.js +51 -47
- package/src/index.js +1 -1
- package/src/query.js +1 -1
- package/src/xml.js +344 -344
package/src/xml.js
CHANGED
|
@@ -1,344 +1,344 @@
|
|
|
1
|
-
// @ts-check
|
|
2
|
-
|
|
3
|
-
import rdf from '@
|
|
4
|
-
import prefixes, { shrink } from '@
|
|
5
|
-
import { create as createXml } from 'xmlbuilder2'
|
|
6
|
-
import { isBlankNode, isLiteral, isNamedNode } from 'is-graph-pointer'
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Generate a CKAN-compatible XML representation of the dataset.
|
|
10
|
-
*
|
|
11
|
-
* @param {import('@rdfjs/types').DatasetCore<import('@rdfjs/types').Quad, import('@rdfjs/types').Quad>} dataset Dataset to convert.
|
|
12
|
-
* @returns {string} XML representation of the dataset.
|
|
13
|
-
*/
|
|
14
|
-
const toXML = (dataset) => {
|
|
15
|
-
const pointer = rdf.clownface({ dataset: rdf.dataset(dataset) })
|
|
16
|
-
const datasetsPointer = pointer.node(rdf.ns.dcat.Dataset).in(rdf.ns.rdf.type)
|
|
17
|
-
|
|
18
|
-
const pf = Object.entries(prefixes)
|
|
19
|
-
// `xml` prefix is reserved and must not be re-declared
|
|
20
|
-
.filter(([prefix]) => prefix !== 'xml')
|
|
21
|
-
.reduce((acc, [prefix, url]) => ({ ...acc, [`xmlns:${prefix}`]: url }), {})
|
|
22
|
-
|
|
23
|
-
return createXml({
|
|
24
|
-
version: '1.0',
|
|
25
|
-
encoding: 'utf-8',
|
|
26
|
-
namespaceAlias: {
|
|
27
|
-
rdf: prefixes.rdf,
|
|
28
|
-
dcat: prefixes.dcat,
|
|
29
|
-
dcterms: prefixes.dcterms,
|
|
30
|
-
vcard: prefixes.vcard,
|
|
31
|
-
foaf: prefixes.foaf,
|
|
32
|
-
},
|
|
33
|
-
}, {
|
|
34
|
-
'rdf:RDF': {
|
|
35
|
-
'@': pf,
|
|
36
|
-
'dcat:Catalog': {
|
|
37
|
-
'dcat:dataset': datasetsPointer.map((dataset) => {
|
|
38
|
-
// Verify that identifiers is CKAN-valid, ignore the dataset otherwise
|
|
39
|
-
const identifiers = dataset.out(rdf.ns.dcterms.identifier)
|
|
40
|
-
if (!identifiers.value) {
|
|
41
|
-
// eslint-disable-next-line no-console
|
|
42
|
-
console.error(`Ignoring dataset ${dataset.value} because it has no or multiple identifiers`)
|
|
43
|
-
return null
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// The initial query ensures that there is a creator
|
|
47
|
-
const creators = dataset.out(rdf.ns.dcterms.creator)
|
|
48
|
-
const creatorSlug = creators.values[0].split('/').slice(-1)[0]
|
|
49
|
-
const identifier = identifiers.value.includes('@')
|
|
50
|
-
? identifiers.value
|
|
51
|
-
: `${identifiers.value}@${creatorSlug}`
|
|
52
|
-
|
|
53
|
-
// Ignore keywords without a language specified because CKAN rejects them
|
|
54
|
-
// @ts-ignore
|
|
55
|
-
const keywords = dataset.out(rdf.ns.dcat.keyword).filter(({ term: { language } }) => !!language)
|
|
56
|
-
|
|
57
|
-
const copyright = dataset.out(rdf.ns.dcterms.rights).out(rdf.ns.schema.identifier)
|
|
58
|
-
|
|
59
|
-
const legalBasisPointer = dataset.out(rdf.ns.dcterms.license)
|
|
60
|
-
const legalBasis = legalBasisPointer.term
|
|
61
|
-
? {
|
|
62
|
-
'rdf:Description': {
|
|
63
|
-
'@': { 'rdf:about': legalBasisPointer.value },
|
|
64
|
-
'rdfs:label': 'legal_basis',
|
|
65
|
-
},
|
|
66
|
-
}
|
|
67
|
-
: null
|
|
68
|
-
|
|
69
|
-
const workExampleDstributions = dataset.out(rdf.ns.schema.workExample)
|
|
70
|
-
.filter(workExample => workExample.out(rdf.ns.schema.encodingFormat).terms.length > 0)
|
|
71
|
-
.map(workExample => ({
|
|
72
|
-
'dcat:Distribution': {
|
|
73
|
-
'@': { 'rdf:about': workExample.out(rdf.ns.schema.url).value },
|
|
74
|
-
'dcterms:issued': serializeTerm(dataset.out(rdf.ns.dcterms.issued)),
|
|
75
|
-
'dcat:mediaType': serializeTerm(workExample.out(rdf.ns.schema.encodingFormat)),
|
|
76
|
-
'dcat:accessURL': serializeTerm(workExample.out(rdf.ns.schema.url)),
|
|
77
|
-
'dcterms:title': serializeTerm(workExample.out(rdf.ns.schema.name)),
|
|
78
|
-
'dcterms:license': serializeTerm(copyright),
|
|
79
|
-
'dcterms:format': {
|
|
80
|
-
'@': {
|
|
81
|
-
'rdf:resource': distributionFormatFromEncoding(workExample.out(rdf.ns.schema.encodingFormat)),
|
|
82
|
-
},
|
|
83
|
-
},
|
|
84
|
-
},
|
|
85
|
-
}))
|
|
86
|
-
|
|
87
|
-
const copiedDistributions = dataset.out(rdf.ns.dcat.distribution)
|
|
88
|
-
.map((distribution, index) => ({
|
|
89
|
-
'dcat:Distribution': {
|
|
90
|
-
'@': { 'rdf:about': `${dataset.value}/distribution/${index + 1}` },
|
|
91
|
-
'dcterms:issued': serializeTerm(dataset.out(rdf.ns.dcterms.issued)),
|
|
92
|
-
'dcterms:modified': serializeTerm(dataset.out(rdf.ns.dcterms.modified)),
|
|
93
|
-
'dcterms:license': serializeTerm(copyright),
|
|
94
|
-
...serializeProperties(distribution),
|
|
95
|
-
},
|
|
96
|
-
}))
|
|
97
|
-
|
|
98
|
-
const publishers = dataset.out(rdf.ns.dcterms.publisher)
|
|
99
|
-
.map(publisher => {
|
|
100
|
-
const attr = {}
|
|
101
|
-
/** @type {string | string[]} */
|
|
102
|
-
let name = publisher.value
|
|
103
|
-
|
|
104
|
-
if (isNamedNode(publisher)) {
|
|
105
|
-
attr['rdf:about'] = publisher.value
|
|
106
|
-
if (publisher.out(rdf.ns.schema.name).values.length > 0) {
|
|
107
|
-
name = publisher.out(rdf.ns.schema.name).values
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
return {
|
|
112
|
-
'foaf:Organization': {
|
|
113
|
-
'@': attr,
|
|
114
|
-
'foaf:name': name,
|
|
115
|
-
},
|
|
116
|
-
}
|
|
117
|
-
})
|
|
118
|
-
|
|
119
|
-
// Datasets contain a mix of legacy (DC) frequencies and new (EU) frequencies.
|
|
120
|
-
// The query makes sure we get both legacy and new ones, we only
|
|
121
|
-
// provide the new ones to CKAN, by converting legacy ones if needed.
|
|
122
|
-
const euFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency/'
|
|
123
|
-
const accrualPeriodicity = dataset.out(rdf.ns.dcterms.accrualPeriodicity)
|
|
124
|
-
.map((t) => {
|
|
125
|
-
if (!t.term || !t.term.value) {
|
|
126
|
-
return t
|
|
127
|
-
}
|
|
128
|
-
// If the frequency is not a legacy frequency, it is returned unchanged.
|
|
129
|
-
t.term.value = convertLegacyFrequency(t.term.value)
|
|
130
|
-
return t
|
|
131
|
-
})
|
|
132
|
-
.filter(({ term }) => term.value.startsWith(euFreqPrefix))
|
|
133
|
-
|
|
134
|
-
return {
|
|
135
|
-
'dcat:Dataset': {
|
|
136
|
-
'@': { 'rdf:about': dataset.value },
|
|
137
|
-
'dcterms:identifier': { '#': identifier },
|
|
138
|
-
'dcterms:title': serializeTerm(dataset.out(rdf.ns.dcterms.title)),
|
|
139
|
-
'dcterms:description': serializeTerm(dataset.out(rdf.ns.dcterms.description)),
|
|
140
|
-
'dcterms:issued': serializeTerm(dataset.out(rdf.ns.dcterms.issued)),
|
|
141
|
-
'dcterms:modified': serializeTerm(dataset.out(rdf.ns.dcterms.modified)),
|
|
142
|
-
'dcterms:publisher': publishers,
|
|
143
|
-
'dcterms:creator': serializeTerm(creators),
|
|
144
|
-
'dcat:contactPoint': serializeBlankNode(
|
|
145
|
-
dataset.out(rdf.ns.dcat.contactPoint),
|
|
146
|
-
[rdf.ns.vcard.Organization, rdf.ns.vcard.Individual],
|
|
147
|
-
),
|
|
148
|
-
'dcat:theme': serializeTerm(dataset.out(rdf.ns.dcat.theme)),
|
|
149
|
-
'dcterms:language': serializeTerm(dataset.out(rdf.ns.dcterms.language)),
|
|
150
|
-
'dcterms:relation': [
|
|
151
|
-
legalBasis,
|
|
152
|
-
serializeTerm(dataset.out(rdf.ns.dcterms.relation), { properties: [rdf.ns.rdfs.label] }),
|
|
153
|
-
],
|
|
154
|
-
'dcat:keyword': serializeTerm(keywords),
|
|
155
|
-
'dcat:landingPage': serializeTerm(dataset.out(rdf.ns.dcat.landingPage)),
|
|
156
|
-
'dcterms:spatial': serializeTerm(dataset.out(rdf.ns.dcterms.spatial)),
|
|
157
|
-
'dcterms:coverage': serializeTerm(dataset.out(rdf.ns.dcterms.coverage)),
|
|
158
|
-
'dcterms:temporal': serializeTerm(dataset.out(rdf.ns.dcterms.temporal)),
|
|
159
|
-
// @ts-ignore
|
|
160
|
-
'dcterms:accrualPeriodicity': serializeTerm(accrualPeriodicity),
|
|
161
|
-
'dcat:distribution': [
|
|
162
|
-
...workExampleDstributions,
|
|
163
|
-
...copiedDistributions,
|
|
164
|
-
],
|
|
165
|
-
'foaf:page': serializeTerm(dataset.out(rdf.ns.foaf.page)),
|
|
166
|
-
},
|
|
167
|
-
}
|
|
168
|
-
}).filter(Boolean),
|
|
169
|
-
},
|
|
170
|
-
},
|
|
171
|
-
}).doc().end({ prettyPrint: true }).concat('\n')
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
/**
|
|
175
|
-
* Serialize a term.
|
|
176
|
-
*
|
|
177
|
-
* @param {import('clownface').MultiPointer | Array<import('clownface').GraphPointer>} pointer Pointer to serialize.
|
|
178
|
-
* @param {object} [options]
|
|
179
|
-
* @param {import('@rdfjs/types').NamedNode[]} [options.properties]
|
|
180
|
-
*/
|
|
181
|
-
const serializeTerm = (pointer, { properties = [] } = {}) => {
|
|
182
|
-
return pointer.map((value) => {
|
|
183
|
-
return serializeLiteral(value) || serializeNamedNode(value, properties) || serializeBlankNode(value) || {}
|
|
184
|
-
})
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
/**
|
|
188
|
-
* Serialize a literal.
|
|
189
|
-
*
|
|
190
|
-
* @param {import('clownface').MultiPointer} pointer Pointer to serialize.
|
|
191
|
-
* @return {Record<string, unknown>} Serialized literal.
|
|
192
|
-
*/
|
|
193
|
-
const serializeLiteral = (pointer) => {
|
|
194
|
-
if (!isLiteral(pointer)) return null
|
|
195
|
-
|
|
196
|
-
const { term } = pointer
|
|
197
|
-
const attrs = {}
|
|
198
|
-
|
|
199
|
-
if (term.language) {
|
|
200
|
-
attrs['xml:lang'] = term.language
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
if (term.datatype && !term.datatype.equals(rdf.ns.rdf.langString) && !term.datatype.equals(rdf.ns.xsd.string)) {
|
|
204
|
-
attrs['rdf:datatype'] = term.datatype.value
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
return {
|
|
208
|
-
'@': attrs,
|
|
209
|
-
'#': term.value,
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
/**
|
|
214
|
-
* Serialize a named node.
|
|
215
|
-
*
|
|
216
|
-
* @param {import('clownface').MultiPointer} pointer Pointer to serialize.
|
|
217
|
-
* @param {import('@rdfjs/types').NamedNode[]} [properties]
|
|
218
|
-
* @return {Record<string, unknown>} Serialized named node.
|
|
219
|
-
*/
|
|
220
|
-
const serializeNamedNode = (pointer, properties = []) => {
|
|
221
|
-
if (!isNamedNode(pointer)) return null
|
|
222
|
-
|
|
223
|
-
const propertyMap = properties.reduce((acc, property) => ({
|
|
224
|
-
...acc,
|
|
225
|
-
[shrink(property.value)]: serializeTerm(pointer.out(property)),
|
|
226
|
-
}), {})
|
|
227
|
-
|
|
228
|
-
if (Object.keys(propertyMap).length > 0) {
|
|
229
|
-
return {
|
|
230
|
-
'rdf:Description': {
|
|
231
|
-
'@': { 'rdf:about': pointer.value },
|
|
232
|
-
...propertyMap,
|
|
233
|
-
},
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
return {
|
|
238
|
-
'@': { 'rdf:resource': pointer.value },
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
/**
|
|
243
|
-
* Serialize a blank node.
|
|
244
|
-
*
|
|
245
|
-
* @param {import('clownface').MultiPointer} pointer Pointer to serialize.
|
|
246
|
-
* @param {Array<import('@rdfjs/types').NamedNode>} [allowedTypesArr] Allowed types for the blank node.
|
|
247
|
-
* @return {Record<string, unknown>} Serialized blank node.
|
|
248
|
-
*/
|
|
249
|
-
const serializeBlankNode = (pointer, allowedTypesArr = []) => {
|
|
250
|
-
if (!isBlankNode(pointer)) return null
|
|
251
|
-
|
|
252
|
-
const allowedTypes = rdf.termSet(allowedTypesArr)
|
|
253
|
-
const types = pointer.out(rdf.ns.rdf.type).terms
|
|
254
|
-
const type = types.find((term) => !allowedTypes.size || allowedTypes.has(term))
|
|
255
|
-
|
|
256
|
-
if (!type) return {}
|
|
257
|
-
|
|
258
|
-
return {
|
|
259
|
-
[shrink(type.value)]: serializeProperties(pointer),
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
function serializeProperties (pointer) {
|
|
264
|
-
const properties = rdf.termSet([...pointer.dataset.match(pointer.term)]
|
|
265
|
-
.map(({ predicate }) => predicate)
|
|
266
|
-
.filter((term) => !term.equals(rdf.ns.rdf.type)))
|
|
267
|
-
|
|
268
|
-
return [...properties].reduce((acc, property) =>
|
|
269
|
-
({ ...acc, [shrink(property.value)]: serializeTerm(pointer.out(property)) }), {})
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
/**
|
|
273
|
-
* Convert encoding format to distribution format.
|
|
274
|
-
*
|
|
275
|
-
* @param {import('clownface').MultiPointer} encodingPointer Pointer to encoding format.
|
|
276
|
-
* @return {string} Distribution format.
|
|
277
|
-
*/
|
|
278
|
-
const distributionFormatFromEncoding = (encodingPointer) => {
|
|
279
|
-
const encoding = encodingPointer.values[0] || ''
|
|
280
|
-
|
|
281
|
-
switch (encoding) {
|
|
282
|
-
case 'text/html': {
|
|
283
|
-
return 'http://publications.europa.eu/resource/authority/file-type/HTML'
|
|
284
|
-
}
|
|
285
|
-
case 'application/sparql-query': {
|
|
286
|
-
return 'http://publications.europa.eu/resource/authority/file-type/SPARQLQ'
|
|
287
|
-
}
|
|
288
|
-
default: {
|
|
289
|
-
return `https://www.iana.org/assignments/media-types/${encoding}`
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
/**
|
|
295
|
-
* Convert legacy frequency to EU frequency if possible.
|
|
296
|
-
* If the frequency is not a legacy frequency, it is returned unchanged.
|
|
297
|
-
*
|
|
298
|
-
* @param {string} frequency Frequency to convert.
|
|
299
|
-
* @returns {string} Converted frequency.
|
|
300
|
-
*/
|
|
301
|
-
export const convertLegacyFrequency = (frequency) => {
|
|
302
|
-
const legacyFreqPrefix = 'http://purl.org/cld/freq'
|
|
303
|
-
const euFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency'
|
|
304
|
-
|
|
305
|
-
switch (frequency) {
|
|
306
|
-
case `${legacyFreqPrefix}/annual`:
|
|
307
|
-
return `${euFreqPrefix}/ANNUAL`
|
|
308
|
-
case `${legacyFreqPrefix}/semiannual`:
|
|
309
|
-
return `${euFreqPrefix}/ANNUAL_2`
|
|
310
|
-
case `${legacyFreqPrefix}/threeTimesAYear`:
|
|
311
|
-
return `${euFreqPrefix}/ANNUAL_3`
|
|
312
|
-
case `${legacyFreqPrefix}/biennial`:
|
|
313
|
-
return `${euFreqPrefix}/BIENNIAL`
|
|
314
|
-
case `${legacyFreqPrefix}/bimonthly`:
|
|
315
|
-
return `${euFreqPrefix}/BIMONTHLY`
|
|
316
|
-
case `${legacyFreqPrefix}/biweekly`:
|
|
317
|
-
return `${euFreqPrefix}/BIWEEKLY`
|
|
318
|
-
case `${legacyFreqPrefix}/continuous`:
|
|
319
|
-
return `${euFreqPrefix}/CONT`
|
|
320
|
-
case `${legacyFreqPrefix}/daily`:
|
|
321
|
-
return `${euFreqPrefix}/DAILY`
|
|
322
|
-
case `${legacyFreqPrefix}/irregular`:
|
|
323
|
-
return `${euFreqPrefix}/IRREG`
|
|
324
|
-
case `${legacyFreqPrefix}/monthly`:
|
|
325
|
-
return `${euFreqPrefix}/MONTHLY`
|
|
326
|
-
case `${legacyFreqPrefix}/semimonthly`:
|
|
327
|
-
return `${euFreqPrefix}/MONTHLY_2`
|
|
328
|
-
case `${legacyFreqPrefix}/threeTimesAMonth`:
|
|
329
|
-
return `${euFreqPrefix}/MONTHLY_3`
|
|
330
|
-
case `${legacyFreqPrefix}/quarterly`:
|
|
331
|
-
return `${euFreqPrefix}/QUARTERLY`
|
|
332
|
-
case `${legacyFreqPrefix}/triennial`:
|
|
333
|
-
return `${euFreqPrefix}/TRIENNIAL`
|
|
334
|
-
case `${legacyFreqPrefix}/weekly`:
|
|
335
|
-
return `${euFreqPrefix}/WEEKLY`
|
|
336
|
-
case `${legacyFreqPrefix}/semiweekly`:
|
|
337
|
-
return `${euFreqPrefix}/WEEKLY_2`
|
|
338
|
-
case `${legacyFreqPrefix}/threeTimesAWeek`:
|
|
339
|
-
return `${euFreqPrefix}/WEEKLY_3`
|
|
340
|
-
}
|
|
341
|
-
return frequency
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
export { toXML }
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
import rdf from '@lindas/env'
|
|
4
|
+
import prefixes, { shrink } from '@lindas/prefixes'
|
|
5
|
+
import { create as createXml } from 'xmlbuilder2'
|
|
6
|
+
import { isBlankNode, isLiteral, isNamedNode } from 'is-graph-pointer'
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Generate a CKAN-compatible XML representation of the dataset.
|
|
10
|
+
*
|
|
11
|
+
* @param {import('@rdfjs/types').DatasetCore<import('@rdfjs/types').Quad, import('@rdfjs/types').Quad>} dataset Dataset to convert.
|
|
12
|
+
* @returns {string} XML representation of the dataset.
|
|
13
|
+
*/
|
|
14
|
+
const toXML = (dataset) => {
|
|
15
|
+
const pointer = rdf.clownface({ dataset: rdf.dataset(dataset) })
|
|
16
|
+
const datasetsPointer = pointer.node(rdf.ns.dcat.Dataset).in(rdf.ns.rdf.type)
|
|
17
|
+
|
|
18
|
+
const pf = Object.entries(prefixes)
|
|
19
|
+
// `xml` prefix is reserved and must not be re-declared
|
|
20
|
+
.filter(([prefix]) => prefix !== 'xml')
|
|
21
|
+
.reduce((acc, [prefix, url]) => ({ ...acc, [`xmlns:${prefix}`]: url }), {})
|
|
22
|
+
|
|
23
|
+
return createXml({
|
|
24
|
+
version: '1.0',
|
|
25
|
+
encoding: 'utf-8',
|
|
26
|
+
namespaceAlias: {
|
|
27
|
+
rdf: prefixes.rdf,
|
|
28
|
+
dcat: prefixes.dcat,
|
|
29
|
+
dcterms: prefixes.dcterms,
|
|
30
|
+
vcard: prefixes.vcard,
|
|
31
|
+
foaf: prefixes.foaf,
|
|
32
|
+
},
|
|
33
|
+
}, {
|
|
34
|
+
'rdf:RDF': {
|
|
35
|
+
'@': pf,
|
|
36
|
+
'dcat:Catalog': {
|
|
37
|
+
'dcat:dataset': datasetsPointer.map((dataset) => {
|
|
38
|
+
// Verify that identifiers is CKAN-valid, ignore the dataset otherwise
|
|
39
|
+
const identifiers = dataset.out(rdf.ns.dcterms.identifier)
|
|
40
|
+
if (!identifiers.value) {
|
|
41
|
+
// eslint-disable-next-line no-console
|
|
42
|
+
console.error(`Ignoring dataset ${dataset.value} because it has no or multiple identifiers`)
|
|
43
|
+
return null
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// The initial query ensures that there is a creator
|
|
47
|
+
const creators = dataset.out(rdf.ns.dcterms.creator)
|
|
48
|
+
const creatorSlug = creators.values[0].split('/').slice(-1)[0]
|
|
49
|
+
const identifier = identifiers.value.includes('@')
|
|
50
|
+
? identifiers.value
|
|
51
|
+
: `${identifiers.value}@${creatorSlug}`
|
|
52
|
+
|
|
53
|
+
// Ignore keywords without a language specified because CKAN rejects them
|
|
54
|
+
// @ts-ignore
|
|
55
|
+
const keywords = dataset.out(rdf.ns.dcat.keyword).filter(({ term: { language } }) => !!language)
|
|
56
|
+
|
|
57
|
+
const copyright = dataset.out(rdf.ns.dcterms.rights).out(rdf.ns.schema.identifier)
|
|
58
|
+
|
|
59
|
+
const legalBasisPointer = dataset.out(rdf.ns.dcterms.license)
|
|
60
|
+
const legalBasis = legalBasisPointer.term
|
|
61
|
+
? {
|
|
62
|
+
'rdf:Description': {
|
|
63
|
+
'@': { 'rdf:about': legalBasisPointer.value },
|
|
64
|
+
'rdfs:label': 'legal_basis',
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
: null
|
|
68
|
+
|
|
69
|
+
const workExampleDstributions = dataset.out(rdf.ns.schema.workExample)
|
|
70
|
+
.filter(workExample => workExample.out(rdf.ns.schema.encodingFormat).terms.length > 0)
|
|
71
|
+
.map(workExample => ({
|
|
72
|
+
'dcat:Distribution': {
|
|
73
|
+
'@': { 'rdf:about': workExample.out(rdf.ns.schema.url).value },
|
|
74
|
+
'dcterms:issued': serializeTerm(dataset.out(rdf.ns.dcterms.issued)),
|
|
75
|
+
'dcat:mediaType': serializeTerm(workExample.out(rdf.ns.schema.encodingFormat)),
|
|
76
|
+
'dcat:accessURL': serializeTerm(workExample.out(rdf.ns.schema.url)),
|
|
77
|
+
'dcterms:title': serializeTerm(workExample.out(rdf.ns.schema.name)),
|
|
78
|
+
'dcterms:license': serializeTerm(copyright),
|
|
79
|
+
'dcterms:format': {
|
|
80
|
+
'@': {
|
|
81
|
+
'rdf:resource': distributionFormatFromEncoding(workExample.out(rdf.ns.schema.encodingFormat)),
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
}))
|
|
86
|
+
|
|
87
|
+
const copiedDistributions = dataset.out(rdf.ns.dcat.distribution)
|
|
88
|
+
.map((distribution, index) => ({
|
|
89
|
+
'dcat:Distribution': {
|
|
90
|
+
'@': { 'rdf:about': `${dataset.value}/distribution/${index + 1}` },
|
|
91
|
+
'dcterms:issued': serializeTerm(dataset.out(rdf.ns.dcterms.issued)),
|
|
92
|
+
'dcterms:modified': serializeTerm(dataset.out(rdf.ns.dcterms.modified)),
|
|
93
|
+
'dcterms:license': serializeTerm(copyright),
|
|
94
|
+
...serializeProperties(distribution),
|
|
95
|
+
},
|
|
96
|
+
}))
|
|
97
|
+
|
|
98
|
+
const publishers = dataset.out(rdf.ns.dcterms.publisher)
|
|
99
|
+
.map(publisher => {
|
|
100
|
+
const attr = {}
|
|
101
|
+
/** @type {string | string[]} */
|
|
102
|
+
let name = publisher.value
|
|
103
|
+
|
|
104
|
+
if (isNamedNode(publisher)) {
|
|
105
|
+
attr['rdf:about'] = publisher.value
|
|
106
|
+
if (publisher.out(rdf.ns.schema.name).values.length > 0) {
|
|
107
|
+
name = publisher.out(rdf.ns.schema.name).values
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
'foaf:Organization': {
|
|
113
|
+
'@': attr,
|
|
114
|
+
'foaf:name': name,
|
|
115
|
+
},
|
|
116
|
+
}
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
// Datasets contain a mix of legacy (DC) frequencies and new (EU) frequencies.
|
|
120
|
+
// The query makes sure we get both legacy and new ones, we only
|
|
121
|
+
// provide the new ones to CKAN, by converting legacy ones if needed.
|
|
122
|
+
const euFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency/'
|
|
123
|
+
const accrualPeriodicity = dataset.out(rdf.ns.dcterms.accrualPeriodicity)
|
|
124
|
+
.map((t) => {
|
|
125
|
+
if (!t.term || !t.term.value) {
|
|
126
|
+
return t
|
|
127
|
+
}
|
|
128
|
+
// If the frequency is not a legacy frequency, it is returned unchanged.
|
|
129
|
+
t.term.value = convertLegacyFrequency(t.term.value)
|
|
130
|
+
return t
|
|
131
|
+
})
|
|
132
|
+
.filter(({ term }) => term.value.startsWith(euFreqPrefix))
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
'dcat:Dataset': {
|
|
136
|
+
'@': { 'rdf:about': dataset.value },
|
|
137
|
+
'dcterms:identifier': { '#': identifier },
|
|
138
|
+
'dcterms:title': serializeTerm(dataset.out(rdf.ns.dcterms.title)),
|
|
139
|
+
'dcterms:description': serializeTerm(dataset.out(rdf.ns.dcterms.description)),
|
|
140
|
+
'dcterms:issued': serializeTerm(dataset.out(rdf.ns.dcterms.issued)),
|
|
141
|
+
'dcterms:modified': serializeTerm(dataset.out(rdf.ns.dcterms.modified)),
|
|
142
|
+
'dcterms:publisher': publishers,
|
|
143
|
+
'dcterms:creator': serializeTerm(creators),
|
|
144
|
+
'dcat:contactPoint': serializeBlankNode(
|
|
145
|
+
dataset.out(rdf.ns.dcat.contactPoint),
|
|
146
|
+
[rdf.ns.vcard.Organization, rdf.ns.vcard.Individual],
|
|
147
|
+
),
|
|
148
|
+
'dcat:theme': serializeTerm(dataset.out(rdf.ns.dcat.theme)),
|
|
149
|
+
'dcterms:language': serializeTerm(dataset.out(rdf.ns.dcterms.language)),
|
|
150
|
+
'dcterms:relation': [
|
|
151
|
+
legalBasis,
|
|
152
|
+
serializeTerm(dataset.out(rdf.ns.dcterms.relation), { properties: [rdf.ns.rdfs.label] }),
|
|
153
|
+
],
|
|
154
|
+
'dcat:keyword': serializeTerm(keywords),
|
|
155
|
+
'dcat:landingPage': serializeTerm(dataset.out(rdf.ns.dcat.landingPage)),
|
|
156
|
+
'dcterms:spatial': serializeTerm(dataset.out(rdf.ns.dcterms.spatial)),
|
|
157
|
+
'dcterms:coverage': serializeTerm(dataset.out(rdf.ns.dcterms.coverage)),
|
|
158
|
+
'dcterms:temporal': serializeTerm(dataset.out(rdf.ns.dcterms.temporal)),
|
|
159
|
+
// @ts-ignore
|
|
160
|
+
'dcterms:accrualPeriodicity': serializeTerm(accrualPeriodicity),
|
|
161
|
+
'dcat:distribution': [
|
|
162
|
+
...workExampleDstributions,
|
|
163
|
+
...copiedDistributions,
|
|
164
|
+
],
|
|
165
|
+
'foaf:page': serializeTerm(dataset.out(rdf.ns.foaf.page)),
|
|
166
|
+
},
|
|
167
|
+
}
|
|
168
|
+
}).filter(Boolean),
|
|
169
|
+
},
|
|
170
|
+
},
|
|
171
|
+
}).doc().end({ prettyPrint: true }).concat('\n')
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Serialize a term.
|
|
176
|
+
*
|
|
177
|
+
* @param {import('@lindas/clownface').MultiPointer | Array<import('@lindas/clownface').GraphPointer>} pointer Pointer to serialize.
|
|
178
|
+
* @param {object} [options]
|
|
179
|
+
* @param {import('@rdfjs/types').NamedNode[]} [options.properties]
|
|
180
|
+
*/
|
|
181
|
+
const serializeTerm = (pointer, { properties = [] } = {}) => {
|
|
182
|
+
return pointer.map((value) => {
|
|
183
|
+
return serializeLiteral(value) || serializeNamedNode(value, properties) || serializeBlankNode(value) || {}
|
|
184
|
+
})
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Serialize a literal.
|
|
189
|
+
*
|
|
190
|
+
* @param {import('@lindas/clownface').MultiPointer} pointer Pointer to serialize.
|
|
191
|
+
* @return {Record<string, unknown>} Serialized literal.
|
|
192
|
+
*/
|
|
193
|
+
const serializeLiteral = (pointer) => {
|
|
194
|
+
if (!isLiteral(pointer)) return null
|
|
195
|
+
|
|
196
|
+
const { term } = pointer
|
|
197
|
+
const attrs = {}
|
|
198
|
+
|
|
199
|
+
if (term.language) {
|
|
200
|
+
attrs['xml:lang'] = term.language
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (term.datatype && !term.datatype.equals(rdf.ns.rdf.langString) && !term.datatype.equals(rdf.ns.xsd.string)) {
|
|
204
|
+
attrs['rdf:datatype'] = term.datatype.value
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
'@': attrs,
|
|
209
|
+
'#': term.value,
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Serialize a named node.
|
|
215
|
+
*
|
|
216
|
+
* @param {import('@lindas/clownface').MultiPointer} pointer Pointer to serialize.
|
|
217
|
+
* @param {import('@rdfjs/types').NamedNode[]} [properties]
|
|
218
|
+
* @return {Record<string, unknown>} Serialized named node.
|
|
219
|
+
*/
|
|
220
|
+
const serializeNamedNode = (pointer, properties = []) => {
|
|
221
|
+
if (!isNamedNode(pointer)) return null
|
|
222
|
+
|
|
223
|
+
const propertyMap = properties.reduce((acc, property) => ({
|
|
224
|
+
...acc,
|
|
225
|
+
[shrink(property.value)]: serializeTerm(pointer.out(property)),
|
|
226
|
+
}), {})
|
|
227
|
+
|
|
228
|
+
if (Object.keys(propertyMap).length > 0) {
|
|
229
|
+
return {
|
|
230
|
+
'rdf:Description': {
|
|
231
|
+
'@': { 'rdf:about': pointer.value },
|
|
232
|
+
...propertyMap,
|
|
233
|
+
},
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
'@': { 'rdf:resource': pointer.value },
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Serialize a blank node.
|
|
244
|
+
*
|
|
245
|
+
* @param {import('@lindas/clownface').MultiPointer} pointer Pointer to serialize.
|
|
246
|
+
* @param {Array<import('@rdfjs/types').NamedNode>} [allowedTypesArr] Allowed types for the blank node.
|
|
247
|
+
* @return {Record<string, unknown>} Serialized blank node.
|
|
248
|
+
*/
|
|
249
|
+
const serializeBlankNode = (pointer, allowedTypesArr = []) => {
|
|
250
|
+
if (!isBlankNode(pointer)) return null
|
|
251
|
+
|
|
252
|
+
const allowedTypes = rdf.termSet(allowedTypesArr)
|
|
253
|
+
const types = pointer.out(rdf.ns.rdf.type).terms
|
|
254
|
+
const type = types.find((term) => !allowedTypes.size || allowedTypes.has(term))
|
|
255
|
+
|
|
256
|
+
if (!type) return {}
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
[shrink(type.value)]: serializeProperties(pointer),
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function serializeProperties (pointer) {
|
|
264
|
+
const properties = rdf.termSet([...pointer.dataset.match(pointer.term)]
|
|
265
|
+
.map(({ predicate }) => predicate)
|
|
266
|
+
.filter((term) => !term.equals(rdf.ns.rdf.type)))
|
|
267
|
+
|
|
268
|
+
return [...properties].reduce((acc, property) =>
|
|
269
|
+
({ ...acc, [shrink(property.value)]: serializeTerm(pointer.out(property)) }), {})
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Convert encoding format to distribution format.
|
|
274
|
+
*
|
|
275
|
+
* @param {import('@lindas/clownface').MultiPointer} encodingPointer Pointer to encoding format.
|
|
276
|
+
* @return {string} Distribution format.
|
|
277
|
+
*/
|
|
278
|
+
const distributionFormatFromEncoding = (encodingPointer) => {
|
|
279
|
+
const encoding = encodingPointer.values[0] || ''
|
|
280
|
+
|
|
281
|
+
switch (encoding) {
|
|
282
|
+
case 'text/html': {
|
|
283
|
+
return 'http://publications.europa.eu/resource/authority/file-type/HTML'
|
|
284
|
+
}
|
|
285
|
+
case 'application/sparql-query': {
|
|
286
|
+
return 'http://publications.europa.eu/resource/authority/file-type/SPARQLQ'
|
|
287
|
+
}
|
|
288
|
+
default: {
|
|
289
|
+
return `https://www.iana.org/assignments/media-types/${encoding}`
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Convert legacy frequency to EU frequency if possible.
|
|
296
|
+
* If the frequency is not a legacy frequency, it is returned unchanged.
|
|
297
|
+
*
|
|
298
|
+
* @param {string} frequency Frequency to convert.
|
|
299
|
+
* @returns {string} Converted frequency.
|
|
300
|
+
*/
|
|
301
|
+
export const convertLegacyFrequency = (frequency) => {
|
|
302
|
+
const legacyFreqPrefix = 'http://purl.org/cld/freq'
|
|
303
|
+
const euFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency'
|
|
304
|
+
|
|
305
|
+
switch (frequency) {
|
|
306
|
+
case `${legacyFreqPrefix}/annual`:
|
|
307
|
+
return `${euFreqPrefix}/ANNUAL`
|
|
308
|
+
case `${legacyFreqPrefix}/semiannual`:
|
|
309
|
+
return `${euFreqPrefix}/ANNUAL_2`
|
|
310
|
+
case `${legacyFreqPrefix}/threeTimesAYear`:
|
|
311
|
+
return `${euFreqPrefix}/ANNUAL_3`
|
|
312
|
+
case `${legacyFreqPrefix}/biennial`:
|
|
313
|
+
return `${euFreqPrefix}/BIENNIAL`
|
|
314
|
+
case `${legacyFreqPrefix}/bimonthly`:
|
|
315
|
+
return `${euFreqPrefix}/BIMONTHLY`
|
|
316
|
+
case `${legacyFreqPrefix}/biweekly`:
|
|
317
|
+
return `${euFreqPrefix}/BIWEEKLY`
|
|
318
|
+
case `${legacyFreqPrefix}/continuous`:
|
|
319
|
+
return `${euFreqPrefix}/CONT`
|
|
320
|
+
case `${legacyFreqPrefix}/daily`:
|
|
321
|
+
return `${euFreqPrefix}/DAILY`
|
|
322
|
+
case `${legacyFreqPrefix}/irregular`:
|
|
323
|
+
return `${euFreqPrefix}/IRREG`
|
|
324
|
+
case `${legacyFreqPrefix}/monthly`:
|
|
325
|
+
return `${euFreqPrefix}/MONTHLY`
|
|
326
|
+
case `${legacyFreqPrefix}/semimonthly`:
|
|
327
|
+
return `${euFreqPrefix}/MONTHLY_2`
|
|
328
|
+
case `${legacyFreqPrefix}/threeTimesAMonth`:
|
|
329
|
+
return `${euFreqPrefix}/MONTHLY_3`
|
|
330
|
+
case `${legacyFreqPrefix}/quarterly`:
|
|
331
|
+
return `${euFreqPrefix}/QUARTERLY`
|
|
332
|
+
case `${legacyFreqPrefix}/triennial`:
|
|
333
|
+
return `${euFreqPrefix}/TRIENNIAL`
|
|
334
|
+
case `${legacyFreqPrefix}/weekly`:
|
|
335
|
+
return `${euFreqPrefix}/WEEKLY`
|
|
336
|
+
case `${legacyFreqPrefix}/semiweekly`:
|
|
337
|
+
return `${euFreqPrefix}/WEEKLY_2`
|
|
338
|
+
case `${legacyFreqPrefix}/threeTimesAWeek`:
|
|
339
|
+
return `${euFreqPrefix}/WEEKLY_3`
|
|
340
|
+
}
|
|
341
|
+
return frequency
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
export { toXML }
|