@lde/sparql-qlever 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/dist/createQlever.d.ts +5 -4
- package/dist/createQlever.d.ts.map +1 -1
- package/dist/createQlever.js +2 -2
- package/dist/importer.d.ts.map +1 -1
- package/dist/importer.js +53 -8
- package/dist/server.d.ts +9 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +8 -4
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -9,3 +9,41 @@ Building a QLever index is slow. To avoid rebuilding it on every pipeline run, t
|
|
|
9
9
|
Only **one** index is cached at a time. In a multi-dataset pipeline, each dataset overwrites the previous index. On re-run, the last-indexed dataset gets a cache hit while the others rebuild.
|
|
10
10
|
|
|
11
11
|
Caching is enabled by default. Disable it by passing `cacheIndex: false` to `createQlever()` or the `Importer` constructor (e.g. driven by a `QLEVER_CACHE_INDEX=false` environment variable).
|
|
12
|
+
|
|
13
|
+
## Configuration
|
|
14
|
+
|
|
15
|
+
`createQlever()` accepts `indexOptions` and `serverOptions` to tune QLever's index builder and server respectively.
|
|
16
|
+
|
|
17
|
+
### Server options (`serverOptions`)
|
|
18
|
+
|
|
19
|
+
Passed to `qlever-server` at startup.
|
|
20
|
+
|
|
21
|
+
| Option | Description | Default |
|
|
22
|
+
| ----------------------- | ------------------------------------------------ | ------- |
|
|
23
|
+
| `memory-max-size` | Maximum memory for query processing and caching. | `'4G'` |
|
|
24
|
+
| `default-query-timeout` | Default query timeout. | `'30s'` |
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
|
|
28
|
+
```ts
|
|
29
|
+
const { importer, server } = createQlever({
|
|
30
|
+
mode: 'docker',
|
|
31
|
+
image: 'adfreiburg/qlever:latest',
|
|
32
|
+
serverOptions: {
|
|
33
|
+
'memory-max-size': '12G',
|
|
34
|
+
'default-query-timeout': '120s',
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Index options (`indexOptions`)
|
|
40
|
+
|
|
41
|
+
Passed to `qlever-index` during import.
|
|
42
|
+
|
|
43
|
+
| Option | Description | Default |
|
|
44
|
+
| ------------------------------- | --------------------------------------------------------------------------------------------- | ----------- |
|
|
45
|
+
| `ascii-prefixes-only` | Enable faster parsing for well-behaved TTL files. | `true` |
|
|
46
|
+
| `num-triples-per-batch` | Triples per batch; lower values reduce memory usage. | `3_000_000` |
|
|
47
|
+
| `stxxl-memory` | Memory budget for sorting during the index build. | `'10G'` |
|
|
48
|
+
| `parse-parallel` | Parse input in parallel. | `true` |
|
|
49
|
+
| `only-pso-and-pos-permutations` | Build only PSO and POS permutations. Faster, but queries with predicate variables won't work. | `false` |
|
package/dist/createQlever.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Downloader } from '@lde/distribution-downloader';
|
|
2
2
|
import { Importer, QleverIndexOptions } from './importer.js';
|
|
3
|
-
import { Server } from './server.js';
|
|
3
|
+
import { QleverServerOptions, Server } from './server.js';
|
|
4
4
|
export type QleverOptions = {
|
|
5
5
|
/** Directory where downloaded data files are stored. */
|
|
6
6
|
dataDir?: string;
|
|
@@ -10,9 +10,10 @@ export type QleverOptions = {
|
|
|
10
10
|
downloader?: Downloader;
|
|
11
11
|
/** Cache QLever indices and skip re-indexing when source data is unchanged. @default true */
|
|
12
12
|
cacheIndex?: boolean;
|
|
13
|
-
/**
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
/** Options for `qlever-index` (index building). */
|
|
14
|
+
indexOptions?: QleverIndexOptions;
|
|
15
|
+
/** Options for `qlever-server` (query processing). */
|
|
16
|
+
serverOptions?: QleverServerOptions;
|
|
16
17
|
} & ({
|
|
17
18
|
mode: 'docker';
|
|
18
19
|
image: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAC7D,OAAO,EAAE,mBAAmB,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1D,MAAM,MAAM,aAAa,GAAG;IAC1B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,6FAA6F;IAC7F,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,mDAAmD;IACnD,YAAY,CAAC,EAAE,kBAAkB,CAAC;IAClC,sDAAsD;IACtD,aAAa,CAAC,EAAE,mBAAmB,CAAC;CACrC,GAAG,CACA;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CACrB,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa;;;EA4BlD"}
|
package/dist/createQlever.js
CHANGED
|
@@ -25,13 +25,13 @@ export function createQlever(options) {
|
|
|
25
25
|
indexName: options.indexName,
|
|
26
26
|
downloader: options.downloader ?? new LastModifiedDownloader(options.dataDir),
|
|
27
27
|
cacheIndex: options.cacheIndex,
|
|
28
|
-
qleverOptions: options.
|
|
28
|
+
qleverOptions: options.indexOptions,
|
|
29
29
|
}),
|
|
30
30
|
server: new Server({
|
|
31
31
|
taskRunner,
|
|
32
32
|
indexName: options.indexName ?? 'data',
|
|
33
33
|
port,
|
|
34
|
-
|
|
34
|
+
qleverOptions: options.serverOptions,
|
|
35
35
|
}),
|
|
36
36
|
};
|
|
37
37
|
}
|
package/dist/importer.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,eAAe,EACf,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAK5C,MAAM,WAAW,kBAAkB;IACjC,oBAAoB;IACpB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,yBAAyB;IACzB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,yJAAyJ;IACzJ,+BAA+B,CAAC,EAAE,OAAO,CAAC;CAC3C;AAED,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC5D,sBAAsB;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC;AAWD,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,qBAAqB;IAa7B,MAAM,CACjB,aAAa,EAAE,YAAY,EAAE,GAC5B,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YA+B5C,QAAQ;
|
|
1
|
+
{"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,eAAe,EACf,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAK5C,MAAM,WAAW,kBAAkB;IACjC,oBAAoB;IACpB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,yBAAyB;IACzB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,yJAAyJ;IACzJ,+BAA+B,CAAC,EAAE,OAAO,CAAC;CAC3C;AAED,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC5D,sBAAsB;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC;AAWD,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,qBAAqB;IAa7B,MAAM,CACjB,aAAa,EAAE,YAAY,EAAE,GAC5B,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YA+B5C,QAAQ;IAkDtB,OAAO,CAAC,gBAAgB;IAQxB,OAAO,CAAC,aAAa;IAIrB;;OAEG;YACW,eAAe;IA0B7B,yDAAyD;YAC3C,eAAe;YAaf,cAAc;YAKd,KAAK;CA0CpB"}
|
package/dist/importer.js
CHANGED
|
@@ -44,7 +44,7 @@ export class Importer {
|
|
|
44
44
|
return result;
|
|
45
45
|
}
|
|
46
46
|
async doImport(distribution) {
|
|
47
|
-
const localFile = await this.options.downloader.download(distribution);
|
|
47
|
+
const { path: localFile, headers } = await this.options.downloader.download(distribution);
|
|
48
48
|
if (await this.isIndexUpToDate(localFile)) {
|
|
49
49
|
const tripleCount = await this.readTripleCount(localFile);
|
|
50
50
|
if (tripleCount === 0) {
|
|
@@ -52,7 +52,7 @@ export class Importer {
|
|
|
52
52
|
}
|
|
53
53
|
return new ImportSuccessful(distribution, undefined, tripleCount);
|
|
54
54
|
}
|
|
55
|
-
const format =
|
|
55
|
+
const { format, warning } = fileFormatFor(distribution.mimeType, basename(localFile), headers.get('Content-Type') ?? undefined);
|
|
56
56
|
let logs;
|
|
57
57
|
try {
|
|
58
58
|
logs = await this.index(localFile, format);
|
|
@@ -71,7 +71,8 @@ export class Importer {
|
|
|
71
71
|
return new ImportFailed(distribution, 'Indexed 0 triples from distribution');
|
|
72
72
|
}
|
|
73
73
|
await this.writeCacheInfo(localFile);
|
|
74
|
-
|
|
74
|
+
const warnings = warning ? [warning] : [];
|
|
75
|
+
return new ImportSuccessful(distribution, undefined, tripleCount, warnings);
|
|
75
76
|
}
|
|
76
77
|
parseTripleCount(logs) {
|
|
77
78
|
// Extract num-triples.normal from the metadata JSON that the index
|
|
@@ -163,10 +164,54 @@ const defaultQleverIndexOptions = {
|
|
|
163
164
|
'parse-parallel': true,
|
|
164
165
|
'only-pso-and-pos-permutations': false,
|
|
165
166
|
};
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
167
|
+
const extensionFormats = new Map([
|
|
168
|
+
['.nt', 'nt'],
|
|
169
|
+
['.nq', 'nq'],
|
|
170
|
+
['.ttl', 'ttl'],
|
|
171
|
+
]);
|
|
172
|
+
const compressionTypes = new Set([
|
|
173
|
+
'application/gzip',
|
|
174
|
+
'application/x-gzip',
|
|
175
|
+
'application/octet-stream',
|
|
176
|
+
]);
|
|
177
|
+
/**
|
|
178
|
+
* Determine the QLever format flag for a distribution.
|
|
179
|
+
*
|
|
180
|
+
* Priority:
|
|
181
|
+
* 1. Server Content-Type (if it maps to a supported RDF format)
|
|
182
|
+
* 2. File extension (fallback when Content-Type is a compression type or missing)
|
|
183
|
+
* 3. Declared MIME type from the dataset registry (last resort)
|
|
184
|
+
*/
|
|
185
|
+
function fileFormatFor(declaredMimeType, filename, serverContentType) {
|
|
186
|
+
const declaredFormat = supportedFormats.get(declaredMimeType);
|
|
187
|
+
if (declaredFormat === undefined) {
|
|
188
|
+
throw new Error(`Unsupported media type: ${declaredMimeType}`);
|
|
189
|
+
}
|
|
190
|
+
// Try server Content-Type first (strip parameters like "; charset=utf-8").
|
|
191
|
+
if (serverContentType) {
|
|
192
|
+
const actualType = serverContentType.split(';')[0].trim();
|
|
193
|
+
if (!compressionTypes.has(actualType)) {
|
|
194
|
+
const serverFormat = supportedFormats.get(actualType);
|
|
195
|
+
if (serverFormat !== undefined && serverFormat !== declaredFormat) {
|
|
196
|
+
return {
|
|
197
|
+
format: serverFormat,
|
|
198
|
+
warning: `Server Content-Type ${actualType} does not match declared media type ${declaredMimeType}; using ${serverFormat} format`,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
if (serverFormat !== undefined) {
|
|
202
|
+
return { format: serverFormat };
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Fall back to file extension.
|
|
207
|
+
const stripped = filename.replace(/\.(gz|bz2|xz|zst)$/i, '');
|
|
208
|
+
const extension = stripped.slice(stripped.lastIndexOf('.'));
|
|
209
|
+
const extensionFormat = extensionFormats.get(extension);
|
|
210
|
+
if (extensionFormat !== undefined && extensionFormat !== declaredFormat) {
|
|
211
|
+
return {
|
|
212
|
+
format: extensionFormat,
|
|
213
|
+
warning: `Declared media type ${declaredMimeType} does not match file extension ${extension}; using ${extensionFormat} format`,
|
|
214
|
+
};
|
|
170
215
|
}
|
|
171
|
-
return format;
|
|
216
|
+
return { format: declaredFormat };
|
|
172
217
|
}
|
package/dist/server.d.ts
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
import { SparqlServer } from '@lde/sparql-server';
|
|
2
2
|
import { TaskRunner } from '@lde/task-runner';
|
|
3
|
+
export interface QleverServerOptions {
|
|
4
|
+
/** Maximum memory for query processing and caching. @default '4G' */
|
|
5
|
+
'memory-max-size'?: string;
|
|
6
|
+
/** Default query timeout. @default '30s' */
|
|
7
|
+
'default-query-timeout'?: string;
|
|
8
|
+
}
|
|
3
9
|
export declare class Server<Task> implements SparqlServer {
|
|
4
10
|
private readonly taskRunner;
|
|
5
11
|
private readonly indexName;
|
|
6
12
|
private task?;
|
|
7
13
|
private readonly port;
|
|
8
|
-
private readonly
|
|
9
|
-
constructor({ taskRunner, indexName, port,
|
|
14
|
+
private readonly qleverOptions;
|
|
15
|
+
constructor({ taskRunner, indexName, port, qleverOptions }: Arguments<Task>);
|
|
10
16
|
start(): Promise<void>;
|
|
11
17
|
stop(): Promise<void>;
|
|
12
18
|
get queryEndpoint(): URL;
|
|
@@ -16,7 +22,6 @@ export interface Arguments<Task> {
|
|
|
16
22
|
indexName: string;
|
|
17
23
|
/** @default 7001 */
|
|
18
24
|
port?: number;
|
|
19
|
-
|
|
20
|
-
queryTimeout?: string;
|
|
25
|
+
qleverOptions?: QleverServerOptions;
|
|
21
26
|
}
|
|
22
27
|
//# sourceMappingURL=server.d.ts.map
|
package/dist/server.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAG9C,qBAAa,MAAM,CAAC,IAAI,CAAE,YAAW,YAAY;IAC/C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAmB;IAC9C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,IAAI,CAAC,CAAO;IACpB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAG9C,MAAM,WAAW,mBAAmB;IAClC,qEAAqE;IACrE,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,4CAA4C;IAC5C,uBAAuB,CAAC,EAAE,MAAM,CAAC;CAClC;AAOD,qBAAa,MAAM,CAAC,IAAI,CAAE,YAAW,YAAY;IAC/C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAmB;IAC9C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,IAAI,CAAC,CAAO;IACpB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAgC;gBAElD,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,aAAa,EAAE,EAAE,SAAS,CAAC,IAAI,CAAC;IAO9D,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAStB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAOlC,IAAW,aAAa,IAAI,GAAG,CAE9B;CACF;AAED,MAAM,WAAW,SAAS,CAAC,IAAI;IAC7B,UAAU,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,mBAAmB,CAAC;CACrC"}
|
package/dist/server.js
CHANGED
|
@@ -1,19 +1,23 @@
|
|
|
1
1
|
import { waitForSparqlEndpointAvailable } from '@lde/wait-for-sparql';
|
|
2
|
+
const defaultQleverServerOptions = {
|
|
3
|
+
'memory-max-size': '4G',
|
|
4
|
+
'default-query-timeout': '30s',
|
|
5
|
+
};
|
|
2
6
|
export class Server {
|
|
3
7
|
taskRunner;
|
|
4
8
|
indexName;
|
|
5
9
|
task;
|
|
6
10
|
port;
|
|
7
|
-
|
|
8
|
-
constructor({ taskRunner, indexName, port,
|
|
11
|
+
qleverOptions;
|
|
12
|
+
constructor({ taskRunner, indexName, port, qleverOptions }) {
|
|
9
13
|
this.taskRunner = taskRunner;
|
|
10
14
|
this.indexName = indexName;
|
|
11
15
|
this.port = port ?? 7001;
|
|
12
|
-
this.
|
|
16
|
+
this.qleverOptions = { ...defaultQleverServerOptions, ...qleverOptions };
|
|
13
17
|
}
|
|
14
18
|
async start() {
|
|
15
19
|
// TODO prevent double starts.
|
|
16
|
-
this.task = await this.taskRunner.run(`qlever-server --index-basename ${this.indexName} --memory-max-size
|
|
20
|
+
this.task = await this.taskRunner.run(`qlever-server --index-basename ${this.indexName} --memory-max-size ${this.qleverOptions['memory-max-size']} --default-query-timeout ${this.qleverOptions['default-query-timeout']} --port ${this.port}`);
|
|
17
21
|
await waitForSparqlEndpointAvailable(this.queryEndpoint.toString());
|
|
18
22
|
}
|
|
19
23
|
async stop() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/sparql-qlever",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.14.0",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/sparql-qlever"
|
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@lde/dataset": "0.7.2",
|
|
28
|
-
"@lde/distribution-downloader": "0.
|
|
29
|
-
"@lde/sparql-importer": "0.
|
|
28
|
+
"@lde/distribution-downloader": "0.6.0",
|
|
29
|
+
"@lde/sparql-importer": "0.6.0",
|
|
30
30
|
"@lde/sparql-server": "0.4.10",
|
|
31
31
|
"@lde/task-runner": "0.2.10",
|
|
32
32
|
"@lde/task-runner-docker": "0.2.11",
|