@netwerk-digitaal-erfgoed/network-of-terms-query 6.2.8 → 6.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/{build → dist}/catalog.d.ts.map +1 -1
- package/{build → dist}/catalog.js +6 -7
- package/dist/config.d.ts.map +1 -0
- package/{build → dist}/config.js +0 -1
- package/{build → dist}/distributions.d.ts.map +1 -1
- package/{build → dist}/distributions.js +0 -1
- package/{build → dist}/helpers/logger-pino.d.ts.map +1 -1
- package/{build → dist}/helpers/logger-pino.js +0 -1
- package/{build → dist}/helpers/logger.js +0 -1
- package/{build → dist}/index.d.ts.map +1 -1
- package/{build → dist}/index.js +0 -1
- package/{build → dist}/instrumentation.js +0 -1
- package/{build → dist}/literal.d.ts +1 -1
- package/{build → dist}/literal.d.ts.map +1 -1
- package/{build → dist}/literal.js +7 -7
- package/{build → dist}/lookup/lookup.d.ts.map +1 -1
- package/{build → dist}/lookup/lookup.js +6 -4
- package/dist/query.d.ts.map +1 -0
- package/{build → dist}/query.js +58 -8
- package/{build → dist}/search/query-mode.js +2 -3
- package/{build → dist}/terms.d.ts +1 -1
- package/{build → dist}/terms.js +2 -3
- package/{build/server-test.d.ts → dist/test-utils.d.ts} +1 -1
- package/dist/test-utils.d.ts.map +1 -0
- package/{build/server-test.js → dist/test-utils.js} +2 -3
- package/dist/tsconfig.lib.tsbuildinfo +1 -0
- package/eslint.config.mjs +22 -0
- package/package.json +37 -41
- package/src/catalog.ts +157 -0
- package/src/config.ts +24 -0
- package/src/distributions.ts +94 -0
- package/src/helpers/logger-pino.ts +45 -0
- package/src/helpers/logger.ts +52 -0
- package/src/index.ts +12 -0
- package/src/instrumentation.ts +51 -0
- package/src/literal.ts +42 -0
- package/src/lookup/lookup.ts +147 -0
- package/src/query.ts +302 -0
- package/src/search/query-mode.ts +54 -0
- package/src/terms.ts +141 -0
- package/src/test-utils.ts +207 -0
- package/test/fixtures/terms.ttl +46 -0
- package/test/query.test.ts +68 -0
- package/test/search/query-mode.test.ts +71 -0
- package/tsconfig.json +13 -0
- package/tsconfig.lib.json +20 -0
- package/tsconfig.test.json +27 -0
- package/vite.config.ts +26 -0
- package/build/catalog.js.map +0 -1
- package/build/config.d.ts.map +0 -1
- package/build/config.js.map +0 -1
- package/build/distributions.js.map +0 -1
- package/build/helpers/logger-pino.js.map +0 -1
- package/build/helpers/logger.js.map +0 -1
- package/build/index.js.map +0 -1
- package/build/instrumentation.js.map +0 -1
- package/build/literal.js.map +0 -1
- package/build/lookup/lookup.js.map +0 -1
- package/build/query.d.ts.map +0 -1
- package/build/query.js.map +0 -1
- package/build/search/query-mode.js.map +0 -1
- package/build/server-test.d.ts.map +0 -1
- package/build/server-test.js.map +0 -1
- package/build/terms.js.map +0 -1
- /package/{build → dist}/catalog.d.ts +0 -0
- /package/{build → dist}/config.d.ts +0 -0
- /package/{build → dist}/distributions.d.ts +0 -0
- /package/{build → dist}/helpers/logger-pino.d.ts +0 -0
- /package/{build → dist}/helpers/logger.d.ts +0 -0
- /package/{build → dist}/helpers/logger.d.ts.map +0 -0
- /package/{build → dist}/index.d.ts +0 -0
- /package/{build → dist}/instrumentation.d.ts +0 -0
- /package/{build → dist}/instrumentation.d.ts.map +0 -0
- /package/{build → dist}/lookup/lookup.d.ts +0 -0
- /package/{build → dist}/query.d.ts +0 -0
- /package/{build → dist}/search/query-mode.d.ts +0 -0
- /package/{build → dist}/search/query-mode.d.ts.map +0 -0
- /package/{build → dist}/terms.d.ts.map +0 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { Catalog, Dataset, Distribution, IRI } from '../catalog.js';
|
|
2
|
+
import {
|
|
3
|
+
Error,
|
|
4
|
+
QueryTermsService,
|
|
5
|
+
ServerError,
|
|
6
|
+
Terms,
|
|
7
|
+
TermsResponse,
|
|
8
|
+
TermsResult,
|
|
9
|
+
TimeoutError,
|
|
10
|
+
} from '../query.js';
|
|
11
|
+
import { Term } from '../terms.js';
|
|
12
|
+
import { clientQueriesCounter } from '../instrumentation.js';
|
|
13
|
+
|
|
14
|
+
export type LookupQueryResult = {
|
|
15
|
+
uri: string;
|
|
16
|
+
distribution: SourceResult;
|
|
17
|
+
result: LookupResult;
|
|
18
|
+
|
|
19
|
+
responseTimeMs: number;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export type SourceResult = Distribution | SourceNotFoundError;
|
|
23
|
+
|
|
24
|
+
export type LookupResult = Term | NotFoundError | TimeoutError | ServerError;
|
|
25
|
+
|
|
26
|
+
export class SourceNotFoundError {
|
|
27
|
+
readonly message: string;
|
|
28
|
+
|
|
29
|
+
constructor(readonly iri: string) {
|
|
30
|
+
this.message = `No source found that can provide term with URI ${iri}`;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export class NotFoundError {
|
|
35
|
+
readonly message: string;
|
|
36
|
+
|
|
37
|
+
constructor(readonly iri: string) {
|
|
38
|
+
this.message = `No term found with URI ${iri}`;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export class LookupService {
|
|
43
|
+
constructor(
|
|
44
|
+
private catalog: Catalog,
|
|
45
|
+
private queryService: QueryTermsService,
|
|
46
|
+
) {}
|
|
47
|
+
|
|
48
|
+
public async lookup(
|
|
49
|
+
iris: string[],
|
|
50
|
+
timeoutMs: number,
|
|
51
|
+
): Promise<LookupQueryResult[]> {
|
|
52
|
+
const irisToDataset = iris.reduce((acc, iri) => {
|
|
53
|
+
const dataset = this.catalog.getDatasetByTermIri(iri);
|
|
54
|
+
if (dataset) {
|
|
55
|
+
acc.set(iri.toString(), dataset);
|
|
56
|
+
}
|
|
57
|
+
return acc;
|
|
58
|
+
}, new Map<string, Dataset>());
|
|
59
|
+
|
|
60
|
+
const datasetToIris = [...irisToDataset].reduce(
|
|
61
|
+
(datasetMap, [iri, dataset]) => {
|
|
62
|
+
datasetMap.set(dataset, [...(datasetMap.get(dataset) ?? []), iri]);
|
|
63
|
+
return datasetMap;
|
|
64
|
+
},
|
|
65
|
+
new Map<Dataset, IRI[]>(),
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
const lookups = [...datasetToIris].map(([dataset]) =>
|
|
69
|
+
this.queryService.lookup(iris, dataset.distributions[0], timeoutMs),
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
const termsPerSource: TermsResponse[] = await Promise.all(lookups);
|
|
73
|
+
|
|
74
|
+
const datasetToTerms = termsPerSource.reduce(
|
|
75
|
+
(acc, response: TermsResponse) => {
|
|
76
|
+
let dataset = this.catalog.getDatasetByDistributionIri(
|
|
77
|
+
response.result.distribution.iri,
|
|
78
|
+
)!;
|
|
79
|
+
if (response.result instanceof Terms) {
|
|
80
|
+
const termsResult =
|
|
81
|
+
(acc.get(dataset)?.result as Terms) ??
|
|
82
|
+
new Terms(response.result.distribution, []);
|
|
83
|
+
for (const term of response.result.terms) {
|
|
84
|
+
if (term.datasetIri !== undefined) {
|
|
85
|
+
const termsDataset = this.catalog.getDatasetByIri(
|
|
86
|
+
term.datasetIri.value,
|
|
87
|
+
);
|
|
88
|
+
if (termsDataset !== undefined) {
|
|
89
|
+
dataset = termsDataset;
|
|
90
|
+
irisToDataset.set(term.id.value, dataset);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
termsResult.terms.push(term);
|
|
94
|
+
}
|
|
95
|
+
acc.set(
|
|
96
|
+
dataset,
|
|
97
|
+
new TermsResponse(termsResult, response.responseTimeMs),
|
|
98
|
+
);
|
|
99
|
+
} else {
|
|
100
|
+
const dataset = this.catalog.getDatasetByDistributionIri(
|
|
101
|
+
response.result.distribution.iri,
|
|
102
|
+
)!;
|
|
103
|
+
acc.set(dataset, response);
|
|
104
|
+
}
|
|
105
|
+
return acc;
|
|
106
|
+
},
|
|
107
|
+
new Map<Dataset, TermsResponse>(),
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
return iris.map((iri) => {
|
|
111
|
+
const dataset = irisToDataset.get(iri.toString());
|
|
112
|
+
if (dataset === undefined) {
|
|
113
|
+
clientQueriesCounter.add(1, {
|
|
114
|
+
type: 'lookup',
|
|
115
|
+
error: 'SourceNotFound',
|
|
116
|
+
});
|
|
117
|
+
return {
|
|
118
|
+
uri: iri,
|
|
119
|
+
distribution: new SourceNotFoundError(iri),
|
|
120
|
+
result: new NotFoundError(iri),
|
|
121
|
+
responseTimeMs: 0,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const response = datasetToTerms.get(dataset)!;
|
|
126
|
+
clientQueriesCounter.add(1, { type: 'lookup' });
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
uri: iri,
|
|
130
|
+
distribution: dataset.distributions[0],
|
|
131
|
+
result: result(response.result, iri),
|
|
132
|
+
responseTimeMs: response.responseTimeMs,
|
|
133
|
+
};
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function result(result: TermsResult, iri: string): LookupResult {
|
|
139
|
+
if (result instanceof Error) {
|
|
140
|
+
return result;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return (
|
|
144
|
+
result.terms.find((term) => term.id.value === iri.toString()) ??
|
|
145
|
+
new NotFoundError(iri)
|
|
146
|
+
);
|
|
147
|
+
}
|
package/src/query.ts
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
import * as Hoek from '@hapi/hoek';
|
|
2
|
+
import Joi from 'joi';
|
|
3
|
+
import { LoggerPino } from './helpers/logger-pino.js';
|
|
4
|
+
import Pino from 'pino';
|
|
5
|
+
import PrettyMilliseconds from 'pretty-ms';
|
|
6
|
+
import * as RDF from '@rdfjs/types';
|
|
7
|
+
import { Term, TermsTransformer } from './terms.js';
|
|
8
|
+
import { QueryMode, queryVariants } from './search/query-mode.js';
|
|
9
|
+
import { Dataset, Distribution, IRI } from './catalog.js';
|
|
10
|
+
import { QueryEngine } from '@comunica/query-sparql';
|
|
11
|
+
import { BindingsFactory } from '@comunica/utils-bindings-factory';
|
|
12
|
+
import { DataFactory } from 'rdf-data-factory';
|
|
13
|
+
import { sourceQueriesHistogram } from './instrumentation.js';
|
|
14
|
+
import { config } from './config.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Check if a query requires string substitution instead of initialBindings.
|
|
18
|
+
* Workaround for Comunica v5 traqula bug that crashes with:
|
|
19
|
+
* - SERVICE clauses
|
|
20
|
+
* - VALUES combination
|
|
21
|
+
*/
|
|
22
|
+
function requiresStringSubstitution(query: string): boolean {
|
|
23
|
+
const hasService = /\bSERVICE\b/i.test(query);
|
|
24
|
+
const hasValues = /\bVALUES\b/i.test(query);
|
|
25
|
+
return hasService || hasValues;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Substitute bindings directly into a SPARQL query string.
|
|
30
|
+
* This is a workaround for Comunica v5's initialBindings bug with SERVICE clauses.
|
|
31
|
+
*/
|
|
32
|
+
function substituteBindings(
|
|
33
|
+
query: string,
|
|
34
|
+
bindings: Record<string, RDF.Term>,
|
|
35
|
+
): string {
|
|
36
|
+
let result = query;
|
|
37
|
+
for (const [name, term] of Object.entries(bindings)) {
|
|
38
|
+
const pattern = new RegExp(`\\?${name}\\b`, 'g');
|
|
39
|
+
if (term.termType === 'NamedNode') {
|
|
40
|
+
result = result.replace(pattern, `<${term.value}>`);
|
|
41
|
+
} else if (term.termType === 'Literal') {
|
|
42
|
+
const literal = term as RDF.Literal;
|
|
43
|
+
const datatype = literal.datatype?.value;
|
|
44
|
+
if (
|
|
45
|
+
datatype &&
|
|
46
|
+
datatype !== 'http://www.w3.org/2001/XMLSchema#string' &&
|
|
47
|
+
datatype !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString'
|
|
48
|
+
) {
|
|
49
|
+
result = result.replace(pattern, `"${term.value}"^^<${datatype}>`);
|
|
50
|
+
} else if (literal.language) {
|
|
51
|
+
result = result.replace(pattern, `"${term.value}"@${literal.language}`);
|
|
52
|
+
} else {
|
|
53
|
+
result = result.replace(pattern, `"${term.value}"`);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return result;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export type TermsResult = Terms | TimeoutError | ServerError;
|
|
61
|
+
|
|
62
|
+
export class TermsResponse {
|
|
63
|
+
constructor(
|
|
64
|
+
readonly result: TermsResult,
|
|
65
|
+
readonly responseTimeMs: number,
|
|
66
|
+
) {}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export class Terms {
|
|
70
|
+
constructor(
|
|
71
|
+
readonly distribution: Distribution,
|
|
72
|
+
readonly terms: Term[],
|
|
73
|
+
) {}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export class Error {
|
|
77
|
+
constructor(
|
|
78
|
+
readonly distribution: Distribution,
|
|
79
|
+
readonly message: string,
|
|
80
|
+
) {}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export class TimeoutError extends Error {
|
|
84
|
+
constructor(
|
|
85
|
+
override readonly distribution: Distribution,
|
|
86
|
+
timeoutMs: number,
|
|
87
|
+
) {
|
|
88
|
+
super(distribution, `Source timed out after ${timeoutMs}ms`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export class ServerError extends Error {}
|
|
93
|
+
|
|
94
|
+
export class QueryTermsService {
|
|
95
|
+
private readonly logger: Pino.Logger;
|
|
96
|
+
private readonly engine: QueryEngine;
|
|
97
|
+
|
|
98
|
+
constructor(options: { comunica?: QueryEngine; logger?: Pino.Logger } = {}) {
|
|
99
|
+
this.engine = options.comunica || new QueryEngine();
|
|
100
|
+
this.logger = options.logger || Pino.pino();
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Parameterize the SPARQL query’s limit in two ways:
|
|
105
|
+
* - as a pre-bound variable ?limit (for GraphDB’s luc:limit, Wikidata and text:query);
|
|
106
|
+
* - by replacing the #LIMIT# placeholder (for LIMIT 123).
|
|
107
|
+
*/
|
|
108
|
+
parameterizeLimit(args: {
|
|
109
|
+
query: string;
|
|
110
|
+
bindings: Record<string, RDF.Term>;
|
|
111
|
+
limit: number;
|
|
112
|
+
}): { queryWithLimit: string; bindingsWithLimit: Record<string, RDF.Term> } {
|
|
113
|
+
return {
|
|
114
|
+
queryWithLimit: args.query.replace('#LIMIT#', `LIMIT ${args.limit}`),
|
|
115
|
+
bindingsWithLimit: {
|
|
116
|
+
...args.bindings,
|
|
117
|
+
limit: dataFactory.literal(
|
|
118
|
+
args.limit.toString(),
|
|
119
|
+
dataFactory.namedNode('http://www.w3.org/2001/XMLSchema#integer'),
|
|
120
|
+
),
|
|
121
|
+
},
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
async search(
|
|
126
|
+
searchQuery: string,
|
|
127
|
+
queryMode: QueryMode,
|
|
128
|
+
dataset: Dataset,
|
|
129
|
+
distribution: Distribution,
|
|
130
|
+
limit: number,
|
|
131
|
+
timeoutMs: number,
|
|
132
|
+
): Promise<TermsResponse> {
|
|
133
|
+
const bindings = [...queryVariants(searchQuery, queryMode)].reduce(
|
|
134
|
+
(record: Record<string, RDF.Term>, [k, v]) => {
|
|
135
|
+
record[k] = dataFactory.literal(v);
|
|
136
|
+
return record;
|
|
137
|
+
},
|
|
138
|
+
{},
|
|
139
|
+
);
|
|
140
|
+
bindings['datasetUri'] = dataFactory.namedNode(dataset.iri.toString());
|
|
141
|
+
|
|
142
|
+
const { queryWithLimit, bindingsWithLimit } = this.parameterizeLimit({
|
|
143
|
+
query: distribution.searchQuery,
|
|
144
|
+
bindings,
|
|
145
|
+
limit,
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
return this.run(
|
|
149
|
+
// For plain SPARQL LIMIT (LIMIT 123) that cannot be pre-bound
|
|
150
|
+
queryWithLimit,
|
|
151
|
+
distribution,
|
|
152
|
+
timeoutMs,
|
|
153
|
+
bindingsWithLimit,
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
async lookup(iris: IRI[], distribution: Distribution, timeoutMs: number) {
|
|
158
|
+
return this.run(
|
|
159
|
+
distribution.lookupQuery.replace(
|
|
160
|
+
'?uris',
|
|
161
|
+
iris.map((iri) => `<${iri}>`).join(' '),
|
|
162
|
+
),
|
|
163
|
+
distribution,
|
|
164
|
+
timeoutMs,
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async run(
|
|
169
|
+
query: string,
|
|
170
|
+
distribution: Distribution,
|
|
171
|
+
timeoutMs: number,
|
|
172
|
+
bindings: Record<string, RDF.Term> = {},
|
|
173
|
+
): Promise<TermsResponse> {
|
|
174
|
+
Joi.attempt(
|
|
175
|
+
timeoutMs,
|
|
176
|
+
Joi.number()
|
|
177
|
+
.integer()
|
|
178
|
+
.min(1)
|
|
179
|
+
.max(config.MAX_QUERY_TIMEOUT)
|
|
180
|
+
.default(config.DEFAULT_QUERY_TIMEOUT),
|
|
181
|
+
);
|
|
182
|
+
|
|
183
|
+
const timer = new Hoek.Bench();
|
|
184
|
+
const logger = new LoggerPino({ logger: this.logger });
|
|
185
|
+
// Extract HTTP credentials if the distribution URL contains any.
|
|
186
|
+
const url = new URL(distribution.endpoint.toString());
|
|
187
|
+
|
|
188
|
+
// Workaround for https://github.com/comunica/comunica/issues/1655, so use
|
|
189
|
+
// string substitution instead of initialBindings for:
|
|
190
|
+
// - SERVICE clauses crash with initialBindings
|
|
191
|
+
// - VALUES crashes in some combinations
|
|
192
|
+
const useStringSubstitution = requiresStringSubstitution(query);
|
|
193
|
+
const finalQuery = useStringSubstitution
|
|
194
|
+
? substituteBindings(query, bindings)
|
|
195
|
+
: query;
|
|
196
|
+
|
|
197
|
+
this.logger.info(`Querying "${url}" with "${finalQuery}"...`);
|
|
198
|
+
const quadStream = await this.engine.queryQuads(finalQuery, {
|
|
199
|
+
log: logger,
|
|
200
|
+
httpAuth:
|
|
201
|
+
url.username === '' ? undefined : url.username + ':' + url.password,
|
|
202
|
+
httpTimeout: timeoutMs,
|
|
203
|
+
noCache: true,
|
|
204
|
+
sources: [
|
|
205
|
+
{
|
|
206
|
+
type: 'sparql',
|
|
207
|
+
value: url.origin + url.pathname,
|
|
208
|
+
},
|
|
209
|
+
],
|
|
210
|
+
// Only pass initialBindings when NOT using string substitution
|
|
211
|
+
...(useStringSubstitution
|
|
212
|
+
? {}
|
|
213
|
+
: { initialBindings: bindingsFactory.fromRecord(bindings) }),
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
return new Promise((resolve) => {
|
|
217
|
+
const termsTransformer = new TermsTransformer();
|
|
218
|
+
quadStream.on('error', (error) => {
|
|
219
|
+
const elapsed = Math.round(timer.elapsed());
|
|
220
|
+
this.logger.error(
|
|
221
|
+
`An error occurred when querying "${distribution.endpoint}": ${error} with %o`,
|
|
222
|
+
error,
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
if (error.message.startsWith('Fetch timed out')) {
|
|
226
|
+
sourceQueriesHistogram.record(Math.round(timer.elapsed()), {
|
|
227
|
+
distribution: distribution.iri.toString(),
|
|
228
|
+
error: 'TimeoutError',
|
|
229
|
+
});
|
|
230
|
+
resolve(
|
|
231
|
+
new TermsResponse(
|
|
232
|
+
new TimeoutError(distribution, timeoutMs),
|
|
233
|
+
elapsed,
|
|
234
|
+
),
|
|
235
|
+
);
|
|
236
|
+
} else {
|
|
237
|
+
sourceQueriesHistogram.record(Math.round(timer.elapsed()), {
|
|
238
|
+
distribution: distribution.iri.toString(),
|
|
239
|
+
error: 'ServerError',
|
|
240
|
+
});
|
|
241
|
+
resolve(
|
|
242
|
+
new TermsResponse(
|
|
243
|
+
new ServerError(
|
|
244
|
+
distribution,
|
|
245
|
+
obfuscateHttpCredentials(error.message),
|
|
246
|
+
),
|
|
247
|
+
elapsed,
|
|
248
|
+
),
|
|
249
|
+
);
|
|
250
|
+
}
|
|
251
|
+
});
|
|
252
|
+
quadStream.on('data', (quad: RDF.Quad) => {
|
|
253
|
+
termsTransformer.fromQuad(quad);
|
|
254
|
+
});
|
|
255
|
+
quadStream.on('end', () => {
|
|
256
|
+
const terms = termsTransformer
|
|
257
|
+
.asArray()
|
|
258
|
+
.sort(byScoreThenAlphabetically);
|
|
259
|
+
this.logger.info(
|
|
260
|
+
`Found ${terms.length} terms matching "${query}" in "${
|
|
261
|
+
distribution.endpoint
|
|
262
|
+
}" in ${PrettyMilliseconds(timer.elapsed())}`,
|
|
263
|
+
);
|
|
264
|
+
sourceQueriesHistogram.record(Math.round(timer.elapsed()), {
|
|
265
|
+
distribution: distribution.iri.toString(),
|
|
266
|
+
});
|
|
267
|
+
resolve(
|
|
268
|
+
new TermsResponse(
|
|
269
|
+
new Terms(distribution, terms),
|
|
270
|
+
Math.round(timer.elapsed()),
|
|
271
|
+
),
|
|
272
|
+
);
|
|
273
|
+
});
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const byScoreThenAlphabetically = (a: Term, b: Term) => {
|
|
279
|
+
const scoreA = parseFloat(a.score?.value ?? '0');
|
|
280
|
+
const scoreB = parseFloat(b.score?.value ?? '0');
|
|
281
|
+
if (scoreA === scoreB) {
|
|
282
|
+
return alphabeticallyByLabels(a, b);
|
|
283
|
+
} else {
|
|
284
|
+
return scoreA < scoreB ? 1 : -1;
|
|
285
|
+
}
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
const alphabeticallyByLabels = (a: Term, b: Term) => {
|
|
289
|
+
const prefLabelA = a.prefLabels[0]?.value ?? '';
|
|
290
|
+
const altLabelA = a.altLabels[0]?.value ?? '';
|
|
291
|
+
const sortLabelA = prefLabelA + altLabelA;
|
|
292
|
+
const prefLabelB = b.prefLabels[0]?.value ?? '';
|
|
293
|
+
const altLabelB = b.altLabels[0]?.value ?? '';
|
|
294
|
+
const sortLabelB = prefLabelB + altLabelB;
|
|
295
|
+
return sortLabelA.localeCompare(sortLabelB);
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
const dataFactory = new DataFactory();
|
|
299
|
+
const bindingsFactory = new BindingsFactory(dataFactory);
|
|
300
|
+
|
|
301
|
+
const obfuscateHttpCredentials = (message: string) =>
|
|
302
|
+
message.replace(/(https?):\/\/.+:.+@/, '$1://***@');
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
export enum QueryMode {
|
|
2
|
+
RAW = 'raw',
|
|
3
|
+
OPTIMIZED = 'optimized',
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export function queryVariants(query: string, type: QueryMode) {
|
|
7
|
+
switch (type) {
|
|
8
|
+
case QueryMode.RAW:
|
|
9
|
+
return new Map([
|
|
10
|
+
['query', query],
|
|
11
|
+
['virtuosoQuery', query],
|
|
12
|
+
]);
|
|
13
|
+
case QueryMode.OPTIMIZED:
|
|
14
|
+
return new Map([
|
|
15
|
+
['query', stringQuery(query)],
|
|
16
|
+
['virtuosoQuery', virtuosoQuery(stringQuery(query))],
|
|
17
|
+
]);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const stringQuery = (query: string) =>
|
|
22
|
+
query.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
23
|
+
|
|
24
|
+
const virtuosoQuery = (query: string) =>
|
|
25
|
+
join(quote(filterStopWords(split(escape(query)))));
|
|
26
|
+
|
|
27
|
+
const escape = (query: string) => query.replace(/'/g, "\\'");
|
|
28
|
+
|
|
29
|
+
const split = (query: string) => query.split(/\s+/);
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Quote parts that are not boolean operators.
|
|
33
|
+
*/
|
|
34
|
+
const quote = (queryParts: string[]) =>
|
|
35
|
+
queryParts.map((part) => (isBooleanOperator(part) ? part : `'${part}'`));
|
|
36
|
+
|
|
37
|
+
const filterStopWords = (queryParts: string[]) =>
|
|
38
|
+
queryParts.filter((part) => part !== '&');
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Join query parts with boolean AND if they are not yet connected with a boolean.
|
|
42
|
+
*/
|
|
43
|
+
const join = (queryParts: string[]) =>
|
|
44
|
+
queryParts.reduce((previousValue, currentValue, currentIndex, array) => {
|
|
45
|
+
const previous = array[currentIndex - 1];
|
|
46
|
+
if (!isBooleanOperator(previous) && !isBooleanOperator(currentValue)) {
|
|
47
|
+
return `${previousValue} AND ${currentValue}`;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return `${previousValue} ${currentValue}`;
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const isBooleanOperator = (maybeBool: string) =>
|
|
54
|
+
maybeBool.toLowerCase() === 'and' || maybeBool.toLowerCase() === 'or';
|
package/src/terms.ts
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import type RDF from '@rdfjs/types';
|
|
2
|
+
|
|
3
|
+
export class Term {
|
|
4
|
+
constructor(
|
|
5
|
+
readonly id: RDF.Term,
|
|
6
|
+
readonly type: RDF.Term | undefined,
|
|
7
|
+
readonly prefLabels: RDF.Literal[],
|
|
8
|
+
readonly altLabels: RDF.Literal[],
|
|
9
|
+
readonly hiddenLabels: RDF.Literal[],
|
|
10
|
+
readonly scopeNotes: RDF.Literal[],
|
|
11
|
+
readonly seeAlso: RDF.NamedNode[],
|
|
12
|
+
readonly broaderTerms: RelatedTerm[],
|
|
13
|
+
readonly narrowerTerms: RelatedTerm[],
|
|
14
|
+
readonly relatedTerms: RelatedTerm[],
|
|
15
|
+
readonly exactMatches: RelatedTerm[],
|
|
16
|
+
readonly datasetIri: RDF.Term | undefined,
|
|
17
|
+
readonly score: RDF.Literal | undefined,
|
|
18
|
+
) {}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class RelatedTerm {
|
|
22
|
+
constructor(
|
|
23
|
+
readonly id: RDF.Term,
|
|
24
|
+
readonly prefLabels: RDF.Literal[],
|
|
25
|
+
) {}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
class SparqlResultTerm {
|
|
29
|
+
constructor(readonly id: RDF.Term) {}
|
|
30
|
+
type: RDF.Term | undefined = undefined;
|
|
31
|
+
prefLabels: RDF.Literal[] = [];
|
|
32
|
+
altLabels: RDF.Literal[] = [];
|
|
33
|
+
hiddenLabels: RDF.Literal[] = [];
|
|
34
|
+
scopeNotes: RDF.Literal[] = [];
|
|
35
|
+
seeAlso: RDF.NamedNode[] = [];
|
|
36
|
+
broaderTerms: RDF.Term[] = [];
|
|
37
|
+
narrowerTerms: RDF.Term[] = [];
|
|
38
|
+
relatedTerms: RDF.Term[] = [];
|
|
39
|
+
exactMatches: RDF.Term[] = [];
|
|
40
|
+
inScheme: RDF.Term | undefined = undefined;
|
|
41
|
+
score: RDF.Literal | undefined = undefined;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export class TermsTransformer {
|
|
45
|
+
private termsIris: Set<string> = new Set();
|
|
46
|
+
private termsMap: Map<string, SparqlResultTerm> = new Map();
|
|
47
|
+
private readonly predicateToPropertyMap = new Map<string, string>([
|
|
48
|
+
['http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'type'],
|
|
49
|
+
['http://www.w3.org/2000/01/rdf-schema#seeAlso', 'seeAlso'],
|
|
50
|
+
['http://www.w3.org/2004/02/skos/core#prefLabel', 'prefLabels'],
|
|
51
|
+
['http://www.w3.org/2008/05/skos#prefLabel', 'prefLabels'],
|
|
52
|
+
['http://www.w3.org/2004/02/skos/core#altLabel', 'altLabels'],
|
|
53
|
+
['http://www.w3.org/2008/05/skos#altLabel', 'altLabels'],
|
|
54
|
+
['http://www.w3.org/2004/02/skos/core#hiddenLabel', 'hiddenLabels'],
|
|
55
|
+
['http://www.w3.org/2008/05/skos#hiddenLabel', 'hiddenLabels'],
|
|
56
|
+
['http://www.w3.org/2004/02/skos/core#scopeNote', 'scopeNotes'],
|
|
57
|
+
['http://www.w3.org/2008/05/skos#scopeNote', 'scopeNotes'],
|
|
58
|
+
['http://www.w3.org/2004/02/skos/core#broader', 'broaderTerms'],
|
|
59
|
+
['http://www.w3.org/2008/05/skos#broader', 'broaderTerms'],
|
|
60
|
+
['http://www.w3.org/2004/02/skos/core#narrower', 'narrowerTerms'],
|
|
61
|
+
['http://www.w3.org/2008/05/skos#narrower', 'narrowerTerms'],
|
|
62
|
+
['http://www.w3.org/2004/02/skos/core#related', 'relatedTerms'],
|
|
63
|
+
['http://www.w3.org/2008/05/skos#related', 'relatedTerms'],
|
|
64
|
+
['http://www.w3.org/2004/02/skos/core#exactMatch', 'exactMatches'],
|
|
65
|
+
['http://www.w3.org/2008/05/skos#exactMatch', 'exactMatches'],
|
|
66
|
+
['http://www.w3.org/2004/02/skos/core#inScheme', 'inScheme'],
|
|
67
|
+
['http://purl.org/voc/vrank#simpleRank', 'score'],
|
|
68
|
+
]);
|
|
69
|
+
|
|
70
|
+
fromQuad(quad: RDF.Quad): void {
|
|
71
|
+
const subject = quad.subject;
|
|
72
|
+
const propertyName = this.predicateToPropertyMap.get(quad.predicate.value);
|
|
73
|
+
const currentTerm =
|
|
74
|
+
this.termsMap.get(subject.value) ?? new SparqlResultTerm(subject);
|
|
75
|
+
this.termsMap.set(subject.value, currentTerm);
|
|
76
|
+
|
|
77
|
+
// skos:Concepts are the top-level search results, which we track in termsIris.
|
|
78
|
+
if (
|
|
79
|
+
propertyName === 'type' &&
|
|
80
|
+
(quad.object.value === 'http://www.w3.org/2004/02/skos/core#Concept' ||
|
|
81
|
+
quad.object.value === 'http://www.w3.org/2008/05/skos#Concept')
|
|
82
|
+
) {
|
|
83
|
+
this.termsIris.add(subject.value);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (propertyName !== undefined) {
|
|
87
|
+
const propertyValue = (currentTerm as any)[propertyName]; // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
88
|
+
if (Array.isArray(propertyValue)) {
|
|
89
|
+
// Prevent duplicate prefLabel values due to duplicate quads.
|
|
90
|
+
if (!propertyValue.find((value) => value.equals(quad.object))) {
|
|
91
|
+
propertyValue.push(quad.object);
|
|
92
|
+
}
|
|
93
|
+
} else {
|
|
94
|
+
(currentTerm as any)[propertyName] = quad.object; // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
asArray(): Term[] {
|
|
100
|
+
return [...this.termsIris].map((iri) => {
|
|
101
|
+
const term = this.termsMap.get(iri)!;
|
|
102
|
+
|
|
103
|
+
return new Term(
|
|
104
|
+
term.id,
|
|
105
|
+
term.type,
|
|
106
|
+
term.prefLabels,
|
|
107
|
+
term.altLabels,
|
|
108
|
+
term.hiddenLabels,
|
|
109
|
+
term.scopeNotes,
|
|
110
|
+
term.seeAlso,
|
|
111
|
+
this.mapRelatedTerms(term.broaderTerms).sort(alphabeticallyByPrefLabel),
|
|
112
|
+
this.mapRelatedTerms(term.narrowerTerms).sort(
|
|
113
|
+
alphabeticallyByPrefLabel,
|
|
114
|
+
),
|
|
115
|
+
this.mapRelatedTerms(term.relatedTerms).sort(alphabeticallyByPrefLabel),
|
|
116
|
+
this.mapRelatedTerms(term.exactMatches).sort(alphabeticallyByPrefLabel),
|
|
117
|
+
term.inScheme,
|
|
118
|
+
term.score,
|
|
119
|
+
);
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Map related IRIs to their related terms, making sure to only accept complete related terms.
|
|
125
|
+
*
|
|
126
|
+
* Related terms can be incomplete because of the SPARQL query limit (see
|
|
127
|
+
* https://github.com/netwerk-digitaal-erfgoed/network-of-terms/issues/36).
|
|
128
|
+
*/
|
|
129
|
+
private mapRelatedTerms = (terms: RDF.Term[]) =>
|
|
130
|
+
terms.reduce((acc: RelatedTerm[], iri: RDF.Term) => {
|
|
131
|
+
const term = this.termsMap.get(iri.value);
|
|
132
|
+
acc.push(new RelatedTerm(iri, term?.prefLabels ?? []));
|
|
133
|
+
return acc;
|
|
134
|
+
}, []);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const alphabeticallyByPrefLabel = (a: RelatedTerm, b: RelatedTerm) => {
|
|
138
|
+
const prefLabelA = a.prefLabels[0]?.value ?? '';
|
|
139
|
+
const prefLabelB = b.prefLabels[0]?.value ?? '';
|
|
140
|
+
return prefLabelA.localeCompare(prefLabelB);
|
|
141
|
+
};
|