@netwerk-digitaal-erfgoed/network-of-terms-query 6.2.7 → 6.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +3 -0
- package/{build → dist}/catalog.d.ts.map +1 -1
- package/{build → dist}/catalog.js +6 -7
- package/dist/config.d.ts.map +1 -0
- package/{build → dist}/config.js +0 -1
- package/{build → dist}/distributions.d.ts.map +1 -1
- package/{build → dist}/distributions.js +0 -1
- package/{build → dist}/helpers/logger-pino.d.ts.map +1 -1
- package/{build → dist}/helpers/logger-pino.js +0 -1
- package/{build → dist}/helpers/logger.js +0 -1
- package/{build → dist}/index.d.ts.map +1 -1
- package/{build → dist}/index.js +0 -1
- package/{build → dist}/instrumentation.js +0 -1
- package/{build → dist}/literal.d.ts +1 -1
- package/{build → dist}/literal.d.ts.map +1 -1
- package/{build → dist}/literal.js +7 -7
- package/{build → dist}/lookup/lookup.d.ts.map +1 -1
- package/{build → dist}/lookup/lookup.js +6 -4
- package/dist/query.d.ts.map +1 -0
- package/{build → dist}/query.js +3 -4
- package/{build → dist}/search/query-mode.js +2 -3
- package/{build → dist}/terms.d.ts +1 -1
- package/{build → dist}/terms.js +2 -3
- package/{build/server-test.d.ts → dist/test-utils.d.ts} +1 -1
- package/dist/test-utils.d.ts.map +1 -0
- package/{build/server-test.js → dist/test-utils.js} +8 -3
- package/dist/tsconfig.lib.tsbuildinfo +1 -0
- package/eslint.config.mjs +22 -0
- package/package.json +33 -36
- package/src/catalog.ts +157 -0
- package/src/config.ts +24 -0
- package/src/distributions.ts +94 -0
- package/src/helpers/logger-pino.ts +45 -0
- package/src/helpers/logger.ts +52 -0
- package/src/index.ts +12 -0
- package/src/instrumentation.ts +51 -0
- package/src/literal.ts +42 -0
- package/src/lookup/lookup.ts +147 -0
- package/src/query.ts +247 -0
- package/src/search/query-mode.ts +54 -0
- package/src/terms.ts +141 -0
- package/src/test-utils.ts +207 -0
- package/test/fixtures/terms.ttl +46 -0
- package/test/query.test.ts +67 -0
- package/test/search/query-mode.test.ts +71 -0
- package/tsconfig.json +13 -0
- package/tsconfig.lib.json +20 -0
- package/tsconfig.test.json +27 -0
- package/vite.config.ts +26 -0
- package/build/catalog.js.map +0 -1
- package/build/config.d.ts.map +0 -1
- package/build/config.js.map +0 -1
- package/build/distributions.js.map +0 -1
- package/build/helpers/logger-pino.js.map +0 -1
- package/build/helpers/logger.js.map +0 -1
- package/build/index.js.map +0 -1
- package/build/instrumentation.js.map +0 -1
- package/build/literal.js.map +0 -1
- package/build/lookup/lookup.js.map +0 -1
- package/build/query.d.ts.map +0 -1
- package/build/query.js.map +0 -1
- package/build/search/query-mode.js.map +0 -1
- package/build/server-test.d.ts.map +0 -1
- package/build/server-test.js.map +0 -1
- package/build/terms.js.map +0 -1
- /package/{build → dist}/catalog.d.ts +0 -0
- /package/{build → dist}/config.d.ts +0 -0
- /package/{build → dist}/distributions.d.ts +0 -0
- /package/{build → dist}/helpers/logger-pino.d.ts +0 -0
- /package/{build → dist}/helpers/logger.d.ts +0 -0
- /package/{build → dist}/helpers/logger.d.ts.map +0 -0
- /package/{build → dist}/index.d.ts +0 -0
- /package/{build → dist}/instrumentation.d.ts +0 -0
- /package/{build → dist}/instrumentation.d.ts.map +0 -0
- /package/{build → dist}/lookup/lookup.d.ts +0 -0
- /package/{build → dist}/query.d.ts +0 -0
- /package/{build → dist}/search/query-mode.d.ts +0 -0
- /package/{build → dist}/search/query-mode.d.ts.map +0 -0
- /package/{build → dist}/terms.d.ts.map +0 -0
package/src/query.ts
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import * as Hoek from '@hapi/hoek';
|
|
2
|
+
import Joi from 'joi';
|
|
3
|
+
import { LoggerPino } from './helpers/logger-pino.js';
|
|
4
|
+
import Pino from 'pino';
|
|
5
|
+
import PrettyMilliseconds from 'pretty-ms';
|
|
6
|
+
import * as RDF from '@rdfjs/types';
|
|
7
|
+
import { Bindings } from '@rdfjs/types';
|
|
8
|
+
import { Term, TermsTransformer } from './terms.js';
|
|
9
|
+
import { QueryMode, queryVariants } from './search/query-mode.js';
|
|
10
|
+
import { Dataset, Distribution, IRI } from './catalog.js';
|
|
11
|
+
import { QueryEngine } from '@comunica/query-sparql';
|
|
12
|
+
import { BindingsFactory } from '@comunica/utils-bindings-factory';
|
|
13
|
+
import { DataFactory } from 'rdf-data-factory';
|
|
14
|
+
import { sourceQueriesHistogram } from './instrumentation.js';
|
|
15
|
+
import { config } from './config.js';
|
|
16
|
+
|
|
17
|
+
export type TermsResult = Terms | TimeoutError | ServerError;
|
|
18
|
+
|
|
19
|
+
export class TermsResponse {
|
|
20
|
+
constructor(
|
|
21
|
+
readonly result: TermsResult,
|
|
22
|
+
readonly responseTimeMs: number,
|
|
23
|
+
) {}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export class Terms {
|
|
27
|
+
constructor(
|
|
28
|
+
readonly distribution: Distribution,
|
|
29
|
+
readonly terms: Term[],
|
|
30
|
+
) {}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export class Error {
|
|
34
|
+
constructor(
|
|
35
|
+
readonly distribution: Distribution,
|
|
36
|
+
readonly message: string,
|
|
37
|
+
) {}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class TimeoutError extends Error {
|
|
41
|
+
constructor(
|
|
42
|
+
override readonly distribution: Distribution,
|
|
43
|
+
timeoutMs: number,
|
|
44
|
+
) {
|
|
45
|
+
super(distribution, `Source timed out after ${timeoutMs}ms`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export class ServerError extends Error {}
|
|
50
|
+
|
|
51
|
+
export class QueryTermsService {
|
|
52
|
+
private readonly logger: Pino.Logger;
|
|
53
|
+
private readonly engine: QueryEngine;
|
|
54
|
+
|
|
55
|
+
constructor(options: { comunica?: QueryEngine; logger?: Pino.Logger } = {}) {
|
|
56
|
+
this.engine = options.comunica || new QueryEngine();
|
|
57
|
+
this.logger = options.logger || Pino.pino();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Parameterize the SPARQL query’s limit in two ways:
|
|
62
|
+
* - as a pre-bound variable ?limit (for GraphDB’s luc:limit, Wikidata and text:query);
|
|
63
|
+
* - by replacing the #LIMIT# placeholder (for LIMIT 123).
|
|
64
|
+
*/
|
|
65
|
+
parameterizeLimit(args: {
|
|
66
|
+
query: string;
|
|
67
|
+
bindings: Record<string, RDF.Term>;
|
|
68
|
+
limit: number;
|
|
69
|
+
}): { queryWithLimit: string; bindingsWithLimit: Record<string, RDF.Term> } {
|
|
70
|
+
return {
|
|
71
|
+
queryWithLimit: args.query.replace('#LIMIT#', `LIMIT ${args.limit}`),
|
|
72
|
+
bindingsWithLimit: {
|
|
73
|
+
...args.bindings,
|
|
74
|
+
limit: dataFactory.literal(
|
|
75
|
+
args.limit.toString(),
|
|
76
|
+
dataFactory.namedNode('http://www.w3.org/2001/XMLSchema#integer'),
|
|
77
|
+
),
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async search(
|
|
83
|
+
searchQuery: string,
|
|
84
|
+
queryMode: QueryMode,
|
|
85
|
+
dataset: Dataset,
|
|
86
|
+
distribution: Distribution,
|
|
87
|
+
limit: number,
|
|
88
|
+
timeoutMs: number,
|
|
89
|
+
): Promise<TermsResponse> {
|
|
90
|
+
const bindings = [...queryVariants(searchQuery, queryMode)].reduce(
|
|
91
|
+
(record: Record<string, RDF.Term>, [k, v]) => {
|
|
92
|
+
record[k] = dataFactory.literal(v);
|
|
93
|
+
return record;
|
|
94
|
+
},
|
|
95
|
+
{},
|
|
96
|
+
);
|
|
97
|
+
bindings['datasetUri'] = dataFactory.namedNode(dataset.iri.toString());
|
|
98
|
+
|
|
99
|
+
const { queryWithLimit, bindingsWithLimit } = this.parameterizeLimit({
|
|
100
|
+
query: distribution.searchQuery,
|
|
101
|
+
bindings,
|
|
102
|
+
limit,
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
return this.run(
|
|
106
|
+
// For plain SPARQL LIMIT (LIMIT 123) that cannot be pre-bound
|
|
107
|
+
queryWithLimit,
|
|
108
|
+
distribution,
|
|
109
|
+
timeoutMs,
|
|
110
|
+
bindingsWithLimit,
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async lookup(iris: IRI[], distribution: Distribution, timeoutMs: number) {
|
|
115
|
+
return this.run(
|
|
116
|
+
distribution.lookupQuery.replace(
|
|
117
|
+
'?uris',
|
|
118
|
+
iris.map((iri) => `<${iri}>`).join(' '),
|
|
119
|
+
),
|
|
120
|
+
distribution,
|
|
121
|
+
timeoutMs,
|
|
122
|
+
);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
async run(
|
|
126
|
+
query: string,
|
|
127
|
+
distribution: Distribution,
|
|
128
|
+
timeoutMs: number,
|
|
129
|
+
bindings: Record<string, RDF.Term> = {},
|
|
130
|
+
): Promise<TermsResponse> {
|
|
131
|
+
Joi.attempt(
|
|
132
|
+
timeoutMs,
|
|
133
|
+
Joi.number()
|
|
134
|
+
.integer()
|
|
135
|
+
.min(1)
|
|
136
|
+
.max(config.MAX_QUERY_TIMEOUT)
|
|
137
|
+
.default(config.DEFAULT_QUERY_TIMEOUT),
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
const timer = new Hoek.Bench();
|
|
141
|
+
const logger = new LoggerPino({ logger: this.logger });
|
|
142
|
+
// Extract HTTP credentials if the distribution URL contains any.
|
|
143
|
+
const url = new URL(distribution.endpoint.toString());
|
|
144
|
+
this.logger.info(`Querying "${url}" with "${query}"...`);
|
|
145
|
+
const quadStream = await this.engine.queryQuads(query, {
|
|
146
|
+
log: logger,
|
|
147
|
+
httpAuth: url.username === '' ? '' : url.username + ':' + url.password,
|
|
148
|
+
httpTimeout: timeoutMs,
|
|
149
|
+
noCache: true,
|
|
150
|
+
sources: [
|
|
151
|
+
{
|
|
152
|
+
type: 'sparql',
|
|
153
|
+
value: url.origin + url.pathname,
|
|
154
|
+
},
|
|
155
|
+
],
|
|
156
|
+
initialBindings: bindingsFactory.fromRecord(
|
|
157
|
+
bindings,
|
|
158
|
+
) as unknown as Bindings,
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
return new Promise((resolve) => {
|
|
162
|
+
const termsTransformer = new TermsTransformer();
|
|
163
|
+
quadStream.on('error', (error) => {
|
|
164
|
+
const elapsed = Math.round(timer.elapsed());
|
|
165
|
+
this.logger.error(
|
|
166
|
+
`An error occurred when querying "${distribution.endpoint}": ${error} with %o`,
|
|
167
|
+
error,
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
if (error.message.startsWith('Fetch timed out')) {
|
|
171
|
+
sourceQueriesHistogram.record(Math.round(timer.elapsed()), {
|
|
172
|
+
distribution: distribution.iri.toString(),
|
|
173
|
+
error: 'TimeoutError',
|
|
174
|
+
});
|
|
175
|
+
resolve(
|
|
176
|
+
new TermsResponse(
|
|
177
|
+
new TimeoutError(distribution, timeoutMs),
|
|
178
|
+
elapsed,
|
|
179
|
+
),
|
|
180
|
+
);
|
|
181
|
+
} else {
|
|
182
|
+
sourceQueriesHistogram.record(Math.round(timer.elapsed()), {
|
|
183
|
+
distribution: distribution.iri.toString(),
|
|
184
|
+
error: 'ServerError',
|
|
185
|
+
});
|
|
186
|
+
resolve(
|
|
187
|
+
new TermsResponse(
|
|
188
|
+
new ServerError(
|
|
189
|
+
distribution,
|
|
190
|
+
obfuscateHttpCredentials(error.message),
|
|
191
|
+
),
|
|
192
|
+
elapsed,
|
|
193
|
+
),
|
|
194
|
+
);
|
|
195
|
+
}
|
|
196
|
+
});
|
|
197
|
+
quadStream.on('data', (quad: RDF.Quad) => {
|
|
198
|
+
termsTransformer.fromQuad(quad);
|
|
199
|
+
});
|
|
200
|
+
quadStream.on('end', () => {
|
|
201
|
+
const terms = termsTransformer
|
|
202
|
+
.asArray()
|
|
203
|
+
.sort(byScoreThenAlphabetically);
|
|
204
|
+
this.logger.info(
|
|
205
|
+
`Found ${terms.length} terms matching "${query}" in "${
|
|
206
|
+
distribution.endpoint
|
|
207
|
+
}" in ${PrettyMilliseconds(timer.elapsed())}`,
|
|
208
|
+
);
|
|
209
|
+
sourceQueriesHistogram.record(Math.round(timer.elapsed()), {
|
|
210
|
+
distribution: distribution.iri.toString(),
|
|
211
|
+
});
|
|
212
|
+
resolve(
|
|
213
|
+
new TermsResponse(
|
|
214
|
+
new Terms(distribution, terms),
|
|
215
|
+
Math.round(timer.elapsed()),
|
|
216
|
+
),
|
|
217
|
+
);
|
|
218
|
+
});
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const byScoreThenAlphabetically = (a: Term, b: Term) => {
|
|
224
|
+
const scoreA = parseFloat(a.score?.value ?? '0');
|
|
225
|
+
const scoreB = parseFloat(b.score?.value ?? '0');
|
|
226
|
+
if (scoreA === scoreB) {
|
|
227
|
+
return alphabeticallyByLabels(a, b);
|
|
228
|
+
} else {
|
|
229
|
+
return scoreA < scoreB ? 1 : -1;
|
|
230
|
+
}
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
const alphabeticallyByLabels = (a: Term, b: Term) => {
|
|
234
|
+
const prefLabelA = a.prefLabels[0]?.value ?? '';
|
|
235
|
+
const altLabelA = a.altLabels[0]?.value ?? '';
|
|
236
|
+
const sortLabelA = prefLabelA + altLabelA;
|
|
237
|
+
const prefLabelB = b.prefLabels[0]?.value ?? '';
|
|
238
|
+
const altLabelB = b.altLabels[0]?.value ?? '';
|
|
239
|
+
const sortLabelB = prefLabelB + altLabelB;
|
|
240
|
+
return sortLabelA.localeCompare(sortLabelB);
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
const dataFactory = new DataFactory();
|
|
244
|
+
const bindingsFactory = new BindingsFactory(dataFactory);
|
|
245
|
+
|
|
246
|
+
const obfuscateHttpCredentials = (message: string) =>
|
|
247
|
+
message.replace(/(https?):\/\/.+:.+@/, '$1://***@');
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
export enum QueryMode {
|
|
2
|
+
RAW = 'raw',
|
|
3
|
+
OPTIMIZED = 'optimized',
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export function queryVariants(query: string, type: QueryMode) {
|
|
7
|
+
switch (type) {
|
|
8
|
+
case QueryMode.RAW:
|
|
9
|
+
return new Map([
|
|
10
|
+
['query', query],
|
|
11
|
+
['virtuosoQuery', query],
|
|
12
|
+
]);
|
|
13
|
+
case QueryMode.OPTIMIZED:
|
|
14
|
+
return new Map([
|
|
15
|
+
['query', stringQuery(query)],
|
|
16
|
+
['virtuosoQuery', virtuosoQuery(stringQuery(query))],
|
|
17
|
+
]);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const stringQuery = (query: string) =>
|
|
22
|
+
query.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
23
|
+
|
|
24
|
+
const virtuosoQuery = (query: string) =>
|
|
25
|
+
join(quote(filterStopWords(split(escape(query)))));
|
|
26
|
+
|
|
27
|
+
const escape = (query: string) => query.replace(/'/g, "\\'");
|
|
28
|
+
|
|
29
|
+
const split = (query: string) => query.split(/\s+/);
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Quote parts that are not boolean operators.
|
|
33
|
+
*/
|
|
34
|
+
const quote = (queryParts: string[]) =>
|
|
35
|
+
queryParts.map((part) => (isBooleanOperator(part) ? part : `'${part}'`));
|
|
36
|
+
|
|
37
|
+
const filterStopWords = (queryParts: string[]) =>
|
|
38
|
+
queryParts.filter((part) => part !== '&');
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Join query parts with boolean AND if they are not yet connected with a boolean.
|
|
42
|
+
*/
|
|
43
|
+
const join = (queryParts: string[]) =>
|
|
44
|
+
queryParts.reduce((previousValue, currentValue, currentIndex, array) => {
|
|
45
|
+
const previous = array[currentIndex - 1];
|
|
46
|
+
if (!isBooleanOperator(previous) && !isBooleanOperator(currentValue)) {
|
|
47
|
+
return `${previousValue} AND ${currentValue}`;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return `${previousValue} ${currentValue}`;
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const isBooleanOperator = (maybeBool: string) =>
|
|
54
|
+
maybeBool.toLowerCase() === 'and' || maybeBool.toLowerCase() === 'or';
|
package/src/terms.ts
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import type RDF from '@rdfjs/types';
|
|
2
|
+
|
|
3
|
+
export class Term {
|
|
4
|
+
constructor(
|
|
5
|
+
readonly id: RDF.Term,
|
|
6
|
+
readonly type: RDF.Term | undefined,
|
|
7
|
+
readonly prefLabels: RDF.Literal[],
|
|
8
|
+
readonly altLabels: RDF.Literal[],
|
|
9
|
+
readonly hiddenLabels: RDF.Literal[],
|
|
10
|
+
readonly scopeNotes: RDF.Literal[],
|
|
11
|
+
readonly seeAlso: RDF.NamedNode[],
|
|
12
|
+
readonly broaderTerms: RelatedTerm[],
|
|
13
|
+
readonly narrowerTerms: RelatedTerm[],
|
|
14
|
+
readonly relatedTerms: RelatedTerm[],
|
|
15
|
+
readonly exactMatches: RelatedTerm[],
|
|
16
|
+
readonly datasetIri: RDF.Term | undefined,
|
|
17
|
+
readonly score: RDF.Literal | undefined,
|
|
18
|
+
) {}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class RelatedTerm {
|
|
22
|
+
constructor(
|
|
23
|
+
readonly id: RDF.Term,
|
|
24
|
+
readonly prefLabels: RDF.Literal[],
|
|
25
|
+
) {}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
class SparqlResultTerm {
|
|
29
|
+
constructor(readonly id: RDF.Term) {}
|
|
30
|
+
type: RDF.Term | undefined = undefined;
|
|
31
|
+
prefLabels: RDF.Literal[] = [];
|
|
32
|
+
altLabels: RDF.Literal[] = [];
|
|
33
|
+
hiddenLabels: RDF.Literal[] = [];
|
|
34
|
+
scopeNotes: RDF.Literal[] = [];
|
|
35
|
+
seeAlso: RDF.NamedNode[] = [];
|
|
36
|
+
broaderTerms: RDF.Term[] = [];
|
|
37
|
+
narrowerTerms: RDF.Term[] = [];
|
|
38
|
+
relatedTerms: RDF.Term[] = [];
|
|
39
|
+
exactMatches: RDF.Term[] = [];
|
|
40
|
+
inScheme: RDF.Term | undefined = undefined;
|
|
41
|
+
score: RDF.Literal | undefined = undefined;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export class TermsTransformer {
|
|
45
|
+
private termsIris: Set<string> = new Set();
|
|
46
|
+
private termsMap: Map<string, SparqlResultTerm> = new Map();
|
|
47
|
+
private readonly predicateToPropertyMap = new Map<string, string>([
|
|
48
|
+
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'type'],
|
|
49
|
+
['http://www.w3.org/2000/01/rdf-schema#seeAlso', 'seeAlso'],
|
|
50
|
+
['http://www.w3.org/2004/02/skos/core#prefLabel', 'prefLabels'],
|
|
51
|
+
['http://www.w3.org/2008/05/skos#prefLabel', 'prefLabels'],
|
|
52
|
+
['http://www.w3.org/2004/02/skos/core#altLabel', 'altLabels'],
|
|
53
|
+
['http://www.w3.org/2008/05/skos#altLabel', 'altLabels'],
|
|
54
|
+
['http://www.w3.org/2004/02/skos/core#hiddenLabel', 'hiddenLabels'],
|
|
55
|
+
['http://www.w3.org/2008/05/skos#hiddenLabel', 'hiddenLabels'],
|
|
56
|
+
['http://www.w3.org/2004/02/skos/core#scopeNote', 'scopeNotes'],
|
|
57
|
+
['http://www.w3.org/2008/05/skos#scopeNote', 'scopeNotes'],
|
|
58
|
+
['http://www.w3.org/2004/02/skos/core#broader', 'broaderTerms'],
|
|
59
|
+
['http://www.w3.org/2008/05/skos#broader', 'broaderTerms'],
|
|
60
|
+
['http://www.w3.org/2004/02/skos/core#narrower', 'narrowerTerms'],
|
|
61
|
+
['http://www.w3.org/2008/05/skos#narrower', 'narrowerTerms'],
|
|
62
|
+
['http://www.w3.org/2004/02/skos/core#related', 'relatedTerms'],
|
|
63
|
+
['http://www.w3.org/2008/05/skos#related', 'relatedTerms'],
|
|
64
|
+
['http://www.w3.org/2004/02/skos/core#exactMatch', 'exactMatches'],
|
|
65
|
+
['http://www.w3.org/2008/05/skos#exactMatch', 'exactMatches'],
|
|
66
|
+
['http://www.w3.org/2004/02/skos/core#inScheme', 'inScheme'],
|
|
67
|
+
['http://purl.org/voc/vrank#simpleRank', 'score'],
|
|
68
|
+
]);
|
|
69
|
+
|
|
70
|
+
fromQuad(quad: RDF.Quad): void {
|
|
71
|
+
const subject = quad.subject;
|
|
72
|
+
const propertyName = this.predicateToPropertyMap.get(quad.predicate.value);
|
|
73
|
+
const currentTerm =
|
|
74
|
+
this.termsMap.get(subject.value) ?? new SparqlResultTerm(subject);
|
|
75
|
+
this.termsMap.set(subject.value, currentTerm);
|
|
76
|
+
|
|
77
|
+
// skos:Concepts are the top-level search results, which we track in termsIris.
|
|
78
|
+
if (
|
|
79
|
+
propertyName === 'type' &&
|
|
80
|
+
(quad.object.value === 'http://www.w3.org/2004/02/skos/core#Concept' ||
|
|
81
|
+
quad.object.value === 'http://www.w3.org/2008/05/skos#Concept')
|
|
82
|
+
) {
|
|
83
|
+
this.termsIris.add(subject.value);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (propertyName !== undefined) {
|
|
87
|
+
const propertyValue = (currentTerm as any)[propertyName]; // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
88
|
+
if (Array.isArray(propertyValue)) {
|
|
89
|
+
// Prevent duplicate prefLabel values due to duplicate quads.
|
|
90
|
+
if (!propertyValue.find((value) => value.equals(quad.object))) {
|
|
91
|
+
propertyValue.push(quad.object);
|
|
92
|
+
}
|
|
93
|
+
} else {
|
|
94
|
+
(currentTerm as any)[propertyName] = quad.object; // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
asArray(): Term[] {
|
|
100
|
+
return [...this.termsIris].map((iri) => {
|
|
101
|
+
const term = this.termsMap.get(iri)!;
|
|
102
|
+
|
|
103
|
+
return new Term(
|
|
104
|
+
term.id,
|
|
105
|
+
term.type,
|
|
106
|
+
term.prefLabels,
|
|
107
|
+
term.altLabels,
|
|
108
|
+
term.hiddenLabels,
|
|
109
|
+
term.scopeNotes,
|
|
110
|
+
term.seeAlso,
|
|
111
|
+
this.mapRelatedTerms(term.broaderTerms).sort(alphabeticallyByPrefLabel),
|
|
112
|
+
this.mapRelatedTerms(term.narrowerTerms).sort(
|
|
113
|
+
alphabeticallyByPrefLabel,
|
|
114
|
+
),
|
|
115
|
+
this.mapRelatedTerms(term.relatedTerms).sort(alphabeticallyByPrefLabel),
|
|
116
|
+
this.mapRelatedTerms(term.exactMatches).sort(alphabeticallyByPrefLabel),
|
|
117
|
+
term.inScheme,
|
|
118
|
+
term.score,
|
|
119
|
+
);
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Map related IRIs to their related terms, making sure to only accept complete related terms.
|
|
125
|
+
*
|
|
126
|
+
* Related terms can be incomplete because of the SPARQL query limit (see
|
|
127
|
+
* https://github.com/netwerk-digitaal-erfgoed/network-of-terms/issues/36).
|
|
128
|
+
*/
|
|
129
|
+
private mapRelatedTerms = (terms: RDF.Term[]) =>
|
|
130
|
+
terms.reduce((acc: RelatedTerm[], iri: RDF.Term) => {
|
|
131
|
+
const term = this.termsMap.get(iri.value);
|
|
132
|
+
acc.push(new RelatedTerm(iri, term?.prefLabels ?? []));
|
|
133
|
+
return acc;
|
|
134
|
+
}, []);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const alphabeticallyByPrefLabel = (a: RelatedTerm, b: RelatedTerm) => {
|
|
138
|
+
const prefLabelA = a.prefLabels[0]?.value ?? '';
|
|
139
|
+
const prefLabelB = b.prefLabels[0]?.value ?? '';
|
|
140
|
+
return prefLabelA.localeCompare(prefLabelB);
|
|
141
|
+
};
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Catalog,
|
|
3
|
+
Dataset,
|
|
4
|
+
Feature,
|
|
5
|
+
FeatureType,
|
|
6
|
+
Organization,
|
|
7
|
+
SparqlDistribution,
|
|
8
|
+
} from './index.js';
|
|
9
|
+
import { setup, teardown as teardownServer } from 'jest-dev-server';
|
|
10
|
+
import { dirname } from 'path';
|
|
11
|
+
import { fileURLToPath } from 'url';
|
|
12
|
+
import { SpawndChildProcess } from 'spawnd';
|
|
13
|
+
import nock from 'nock';
|
|
14
|
+
|
|
15
|
+
nock('https://example.com')
|
|
16
|
+
.post('/distributions/timeout')
|
|
17
|
+
.delay(3000)
|
|
18
|
+
.reply(200)
|
|
19
|
+
.persist();
|
|
20
|
+
|
|
21
|
+
export const teardown = async () => {
|
|
22
|
+
await teardownServer(servers);
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
let servers: SpawndChildProcess[];
|
|
26
|
+
export const testCatalog = (port: number) =>
|
|
27
|
+
new Catalog([
|
|
28
|
+
new Dataset(
|
|
29
|
+
'https://data.rkd.nl/rkdartists',
|
|
30
|
+
{ nl: 'RKDartists', en: 'RKDartists' },
|
|
31
|
+
{
|
|
32
|
+
nl: 'Biografische gegevens van Nederlandse en buitenlandse kunstenaars van de middeleeuwen tot heden',
|
|
33
|
+
},
|
|
34
|
+
['https://data.cultureelerfgoed.nl/termennetwerk/onderwerpen/Personen'],
|
|
35
|
+
['https://example.com/resources/'],
|
|
36
|
+
'https://example.com/rkdartists',
|
|
37
|
+
['en', 'nl'],
|
|
38
|
+
[
|
|
39
|
+
new Organization(
|
|
40
|
+
'https://rkd.nl',
|
|
41
|
+
{ nl: 'RKD – Nederlands Instituut voor Kunstgeschiedenis' },
|
|
42
|
+
{ nl: 'RKD' },
|
|
43
|
+
),
|
|
44
|
+
],
|
|
45
|
+
[
|
|
46
|
+
new SparqlDistribution(
|
|
47
|
+
'https://data.netwerkdigitaalerfgoed.nl/rkd/rkdartists/sparql',
|
|
48
|
+
`http://localhost:${port}/sparql`,
|
|
49
|
+
`
|
|
50
|
+
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
|
51
|
+
CONSTRUCT {
|
|
52
|
+
?s ?p ?o
|
|
53
|
+
}
|
|
54
|
+
WHERE {
|
|
55
|
+
{
|
|
56
|
+
SELECT DISTINCT ?s WHERE {
|
|
57
|
+
?s ?labelPredicate ?label .
|
|
58
|
+
VALUES ?labelPredicate { skos:prefLabel skos:altLabel skos:hiddenLabel }
|
|
59
|
+
FILTER (regex(?label, ?query, "i"))
|
|
60
|
+
}
|
|
61
|
+
#LIMIT#
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
?s ?p ?o .
|
|
65
|
+
|
|
66
|
+
OPTIONAL {
|
|
67
|
+
?s skos:exactMatch ?match .
|
|
68
|
+
?match skos:prefLabel ?match_label .
|
|
69
|
+
}
|
|
70
|
+
}`,
|
|
71
|
+
`
|
|
72
|
+
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
|
73
|
+
CONSTRUCT {
|
|
74
|
+
?s ?p ?o ;
|
|
75
|
+
skos:broader ?broader_uri ;
|
|
76
|
+
skos:narrower ?narrower_uri ;
|
|
77
|
+
skos:related ?related_uri ;
|
|
78
|
+
skos:inScheme <https://data.rkd.nl/rkdartists> .
|
|
79
|
+
?broader_uri skos:prefLabel ?broader_prefLabel .
|
|
80
|
+
?narrower_uri skos:prefLabel ?narrower_prefLabel .
|
|
81
|
+
?related_uri skos:prefLabel ?related_prefLabel .
|
|
82
|
+
}
|
|
83
|
+
WHERE {
|
|
84
|
+
?s ?p ?o.
|
|
85
|
+
VALUES ?s { ?uris }
|
|
86
|
+
OPTIONAL {
|
|
87
|
+
?s skos:broader ?broader_uri.
|
|
88
|
+
?broader_uri skos:prefLabel ?broader_prefLabel.
|
|
89
|
+
}
|
|
90
|
+
OPTIONAL {
|
|
91
|
+
?s skos:narrower ?narrower_uri.
|
|
92
|
+
?narrower_uri skos:prefLabel ?narrower_prefLabel.
|
|
93
|
+
}
|
|
94
|
+
OPTIONAL {
|
|
95
|
+
?s skos:related ?related_uri.
|
|
96
|
+
?related_uri skos:prefLabel ?related_prefLabel.
|
|
97
|
+
}
|
|
98
|
+
}`,
|
|
99
|
+
[
|
|
100
|
+
new Feature(
|
|
101
|
+
FeatureType.RECONCILIATION,
|
|
102
|
+
new URL('https://example.com/reconcile/rkd'),
|
|
103
|
+
),
|
|
104
|
+
],
|
|
105
|
+
),
|
|
106
|
+
],
|
|
107
|
+
{ nl: 'RKD' },
|
|
108
|
+
),
|
|
109
|
+
new Dataset(
|
|
110
|
+
'https://data.cultureelerfgoed.nl/term/id/cht',
|
|
111
|
+
{ nl: 'Cultuurhistorische Thesaurus' },
|
|
112
|
+
{ nl: 'Onderwerpen voor het beschrijven van cultureel erfgoed' },
|
|
113
|
+
[
|
|
114
|
+
'https://data.cultureelerfgoed.nl/termennetwerk/onderwerpen/Abstracte-begrippen',
|
|
115
|
+
],
|
|
116
|
+
['https://data.cultureelerfgoed.nl/term/id/cht/'],
|
|
117
|
+
'https://example.com/cht',
|
|
118
|
+
['nl'],
|
|
119
|
+
[
|
|
120
|
+
new Organization(
|
|
121
|
+
'https://www.cultureelerfgoed.nl',
|
|
122
|
+
{ nl: 'Rijksdienst voor het Cultureel Erfgoed' },
|
|
123
|
+
{ nl: 'RCE' },
|
|
124
|
+
),
|
|
125
|
+
],
|
|
126
|
+
[
|
|
127
|
+
new SparqlDistribution(
|
|
128
|
+
'https://example.com/distributions/endpoint-error',
|
|
129
|
+
'http://does-not-resolve',
|
|
130
|
+
'CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }',
|
|
131
|
+
'CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }',
|
|
132
|
+
[
|
|
133
|
+
new Feature(
|
|
134
|
+
FeatureType.RECONCILIATION,
|
|
135
|
+
new URL('https://example.com/reconcile/cht'),
|
|
136
|
+
),
|
|
137
|
+
],
|
|
138
|
+
),
|
|
139
|
+
],
|
|
140
|
+
),
|
|
141
|
+
new Dataset(
|
|
142
|
+
'http://vocab.getty.edu/aat',
|
|
143
|
+
{ nl: 'Art & Architecture Thesaurus' },
|
|
144
|
+
{
|
|
145
|
+
nl: 'Onderwerpen voor het beschrijven van architectuur-, kunst- en cultuurhistorische collecties',
|
|
146
|
+
},
|
|
147
|
+
[
|
|
148
|
+
'https://data.cultureelerfgoed.nl/termennetwerk/onderwerpen/Abstracte-begrippen',
|
|
149
|
+
],
|
|
150
|
+
['http://vocab.getty.edu/aat/'],
|
|
151
|
+
'https://example.com/aat',
|
|
152
|
+
['nl'],
|
|
153
|
+
[
|
|
154
|
+
new Organization(
|
|
155
|
+
'http://www.getty.edu/research/',
|
|
156
|
+
{ nl: 'Getty Research Institute' },
|
|
157
|
+
{ nl: 'Getty' },
|
|
158
|
+
),
|
|
159
|
+
],
|
|
160
|
+
[
|
|
161
|
+
new SparqlDistribution(
|
|
162
|
+
'https://example.com/distributions/timeout',
|
|
163
|
+
'https://example.com/distributions/timeout',
|
|
164
|
+
'CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }',
|
|
165
|
+
'CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }',
|
|
166
|
+
),
|
|
167
|
+
],
|
|
168
|
+
),
|
|
169
|
+
new Dataset(
|
|
170
|
+
'http://data.beeldengeluid.nl/gtaa/Persoonsnamen',
|
|
171
|
+
{ nl: 'GTAA: persoonsnamen' },
|
|
172
|
+
{ nl: 'Personen voor het beschrijven van audiovisueel materiaal' },
|
|
173
|
+
[
|
|
174
|
+
'https://data.cultureelerfgoed.nl/termennetwerk/onderwerpen/Abstracte-begrippen',
|
|
175
|
+
],
|
|
176
|
+
['http://data.beeldengeluid.nl/gtaa/'],
|
|
177
|
+
'https://example.com/gtaa',
|
|
178
|
+
['nl'],
|
|
179
|
+
[
|
|
180
|
+
new Organization(
|
|
181
|
+
'https://www.beeldengeluid.nl/',
|
|
182
|
+
{ nl: 'Nederlands Instituut voor Beeld en Geluid' },
|
|
183
|
+
{ nl: 'Beeld en Geluid' },
|
|
184
|
+
),
|
|
185
|
+
],
|
|
186
|
+
[
|
|
187
|
+
new SparqlDistribution(
|
|
188
|
+
'https://data.beeldengeluid.nl/id/datadownload/0026',
|
|
189
|
+
'https://username:password@gtaa.apis.beeldengeluid.nl/sparql',
|
|
190
|
+
'CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }',
|
|
191
|
+
'CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }',
|
|
192
|
+
),
|
|
193
|
+
],
|
|
194
|
+
),
|
|
195
|
+
]);
|
|
196
|
+
|
|
197
|
+
export async function startDistributionSparqlEndpoint(
|
|
198
|
+
port: number,
|
|
199
|
+
): Promise<void> {
|
|
200
|
+
servers = await setup({
|
|
201
|
+
command: `npx --no comunica-sparql-file-http ${dirname(
|
|
202
|
+
fileURLToPath(import.meta.url),
|
|
203
|
+
)}/../test/fixtures/terms.ttl -p ${port}`,
|
|
204
|
+
port,
|
|
205
|
+
launchTimeout: 20000,
|
|
206
|
+
});
|
|
207
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
|
|
2
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
|
3
|
+
@prefix vrank: <http://purl.org/voc/vrank#> .
|
|
4
|
+
|
|
5
|
+
<https://example.com/resources/artwork>
|
|
6
|
+
a skos:Concept ;
|
|
7
|
+
skos:prefLabel "Nachtwacht"@nl, "The Night Watch"@en ;
|
|
8
|
+
skos:altLabel "Nachtwacht alt"@nl, "Night Watch alt"@en ;
|
|
9
|
+
skos:scopeNote "One of the most famous Dutch paintings"@en ;
|
|
10
|
+
skos:hiddenLabel "art" ;
|
|
11
|
+
skos:broader <https://example.com/resources/2> ;
|
|
12
|
+
skos:related <https://example.com/resources/painter> ,
|
|
13
|
+
<https://example.com/resources/painting> ,
|
|
14
|
+
<https://example.com/resources/art> ;
|
|
15
|
+
skos:exactMatch <https://example.com/resources/match> ;
|
|
16
|
+
skos:ignored "Not used by Network of Terms" ;
|
|
17
|
+
rdfs:seeAlso <https://example.com/html/artwork> ;
|
|
18
|
+
vrank:simpleRank 18.0 .
|
|
19
|
+
|
|
20
|
+
<https://example.com/resources/painting>
|
|
21
|
+
a <http://www.w3.org/2008/05/skos#Concept> ;
|
|
22
|
+
skos:altLabel
|
|
23
|
+
"painted things that can be beautiful"@en ,
|
|
24
|
+
"another altLabel"@en ,
|
|
25
|
+
"mooie geschilderde dingen"@nl ,
|
|
26
|
+
"en nog meer"@nl ;
|
|
27
|
+
skos:related <https://example.com/resources/art> ;
|
|
28
|
+
skos:broader <https://example.com/resources/art> .
|
|
29
|
+
|
|
30
|
+
<https://example.com/resources/painter>
|
|
31
|
+
a <http://www.w3.org/2008/05/skos#Concept> ;
|
|
32
|
+
skos:prefLabel "Rembrandt"@en ;
|
|
33
|
+
vrank:simpleRank 20.5 .
|
|
34
|
+
|
|
35
|
+
<https://example.com/resources/art>
|
|
36
|
+
a skos:Concept ;
|
|
37
|
+
skos:prefLabel "All things art"@en, "Kunstige dingen" ; # last value is fallback and has no language tag on purpose
|
|
38
|
+
skos:altLabel "Art"@en ;
|
|
39
|
+
skos:narrower <https://example.com/resources/artwork> .
|
|
40
|
+
|
|
41
|
+
<https://example.com/resources/alphabet>
|
|
42
|
+
a skos:Concept ;
|
|
43
|
+
skos:altLabel "Resource without prefLabel"@en .
|
|
44
|
+
|
|
45
|
+
<https://example.com/resources/match>
|
|
46
|
+
skos:prefLabel "Exact match"@en .
|