@lde/sparql-qlever 0.11.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/createQlever.d.ts +9 -2
- package/dist/createQlever.d.ts.map +1 -1
- package/dist/createQlever.js +7 -0
- package/dist/importer.d.ts +18 -25
- package/dist/importer.d.ts.map +1 -1
- package/dist/importer.js +59 -41
- package/package.json +3 -2
package/dist/createQlever.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Downloader } from '@lde/distribution-downloader';
|
|
2
|
-
import { Importer } from './importer.js';
|
|
2
|
+
import { Importer, QleverIndexOptions } from './importer.js';
|
|
3
3
|
import { Server } from './server.js';
|
|
4
4
|
export type QleverOptions = {
|
|
5
5
|
/** Directory where downloaded data files are stored. */
|
|
@@ -8,8 +8,9 @@ export type QleverOptions = {
|
|
|
8
8
|
/** @default 7001 */
|
|
9
9
|
port?: number;
|
|
10
10
|
downloader?: Downloader;
|
|
11
|
-
/** Cache QLever indices and skip re-indexing when source data is unchanged.
|
|
11
|
+
/** Cache QLever indices and skip re-indexing when source data is unchanged. @default true */
|
|
12
12
|
cacheIndex?: boolean;
|
|
13
|
+
qleverOptions?: QleverIndexOptions;
|
|
13
14
|
} & ({
|
|
14
15
|
mode: 'docker';
|
|
15
16
|
image: string;
|
|
@@ -17,6 +18,12 @@ export type QleverOptions = {
|
|
|
17
18
|
} | {
|
|
18
19
|
mode: 'native';
|
|
19
20
|
});
|
|
21
|
+
/**
|
|
22
|
+
* Create a paired QLever {@link Importer} and {@link Server} that share a
|
|
23
|
+
* single {@link TaskRunner}. In pipeline setups the importer and server must
|
|
24
|
+
* use the same runner (and therefore the same Docker container or working
|
|
25
|
+
* directory) so that the server can serve the index the importer built.
|
|
26
|
+
*/
|
|
20
27
|
export declare function createQlever(options: QleverOptions): {
|
|
21
28
|
importer: Importer;
|
|
22
29
|
server: Server<unknown>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,MAAM,aAAa,GAAG;IAC1B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,6FAA6F;IAC7F,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC,GAAG,CACA;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CACrB,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa;;;EA2BlD"}
|
package/dist/createQlever.js
CHANGED
|
@@ -3,6 +3,12 @@ import { NativeTaskRunner } from '@lde/task-runner-native';
|
|
|
3
3
|
import { LastModifiedDownloader, } from '@lde/distribution-downloader';
|
|
4
4
|
import { Importer } from './importer.js';
|
|
5
5
|
import { Server } from './server.js';
|
|
6
|
+
/**
|
|
7
|
+
* Create a paired QLever {@link Importer} and {@link Server} that share a
|
|
8
|
+
* single {@link TaskRunner}. In pipeline setups the importer and server must
|
|
9
|
+
* use the same runner (and therefore the same Docker container or working
|
|
10
|
+
* directory) so that the server can serve the index the importer built.
|
|
11
|
+
*/
|
|
6
12
|
export function createQlever(options) {
|
|
7
13
|
const port = options.port ?? 7001;
|
|
8
14
|
const taskRunner = options.mode === 'docker'
|
|
@@ -19,6 +25,7 @@ export function createQlever(options) {
|
|
|
19
25
|
indexName: options.indexName,
|
|
20
26
|
downloader: options.downloader ?? new LastModifiedDownloader(options.dataDir),
|
|
21
27
|
cacheIndex: options.cacheIndex,
|
|
28
|
+
qleverOptions: options.qleverOptions,
|
|
22
29
|
}),
|
|
23
30
|
server: new Server({
|
|
24
31
|
taskRunner,
|
package/dist/importer.d.ts
CHANGED
|
@@ -1,34 +1,27 @@
|
|
|
1
|
-
import { Importer as ImporterInterface, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
|
|
1
|
+
import { Importer as ImporterInterface, ImporterOptions, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
|
|
2
2
|
import { Distribution } from '@lde/dataset';
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
export interface QleverIndexOptions {
|
|
4
|
+
/** @default true */
|
|
5
|
+
'ascii-prefixes-only'?: boolean;
|
|
6
|
+
/** @default 3_000_000 */
|
|
7
|
+
'num-triples-per-batch'?: number;
|
|
8
|
+
/** Memory budget for sorting during the index build. @default '10G' */
|
|
9
|
+
'stxxl-memory'?: string;
|
|
10
|
+
/** @default true */
|
|
11
|
+
'parse-parallel'?: boolean;
|
|
12
|
+
/** Build only PSO and POS permutations. Faster, but queries with predicate variables won't work. Also disables pattern precomputation. @default false */
|
|
13
|
+
'only-pso-and-pos-permutations'?: boolean;
|
|
14
|
+
}
|
|
15
|
+
export interface QleverImporterOptions extends ImporterOptions {
|
|
16
|
+
/** @default 'data' */
|
|
7
17
|
indexName?: string;
|
|
8
|
-
|
|
9
|
-
qleverOptions?: {
|
|
10
|
-
'ascii-prefixes-only': boolean;
|
|
11
|
-
'num-triples-per-batch': number;
|
|
12
|
-
};
|
|
13
|
-
port?: number;
|
|
14
|
-
/** Cache QLever indices and skip re-indexing when source data is unchanged. Defaults to `true`. */
|
|
15
|
-
cacheIndex?: boolean;
|
|
18
|
+
qleverOptions?: QleverIndexOptions;
|
|
16
19
|
}
|
|
17
|
-
/**
|
|
18
|
-
* Import RDF to a QLever SPARQL server.
|
|
19
|
-
*
|
|
20
|
-
* - Use the QLever CLI because the Graph Store Protocol is not parallelized.
|
|
21
|
-
*/
|
|
22
20
|
export declare class Importer implements ImporterInterface {
|
|
23
|
-
private
|
|
24
|
-
|
|
25
|
-
private downloader;
|
|
26
|
-
private qleverOptions;
|
|
27
|
-
private cacheIndex;
|
|
28
|
-
constructor({ taskRunner, downloader, indexName, qleverOptions, cacheIndex, }: Options);
|
|
21
|
+
private readonly options;
|
|
22
|
+
constructor(options: QleverImporterOptions);
|
|
29
23
|
import(distributions: Distribution[]): Promise<NotSupported | ImportSuccessful | ImportFailed>;
|
|
30
24
|
private doImport;
|
|
31
|
-
private fileFormatFromMimeType;
|
|
32
25
|
private parseTripleCount;
|
|
33
26
|
private cacheInfoPath;
|
|
34
27
|
/**
|
package/dist/importer.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,eAAe,EACf,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAK5C,MAAM,WAAW,kBAAkB;IACjC,oBAAoB;IACpB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,yBAAyB;IACzB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,yJAAyJ;IACzJ,+BAA+B,CAAC,EAAE,OAAO,CAAC;CAC3C;AAED,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC5D,sBAAsB;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC;AAWD,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,qBAAqB;IAa7B,MAAM,CACjB,aAAa,EAAE,YAAY,EAAE,GAC5B,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YA+B5C,QAAQ;IA4CtB,OAAO,CAAC,gBAAgB;IAQxB,OAAO,CAAC,aAAa;IAIrB;;OAEG;YACW,eAAe;IA0B7B,yDAAyD;YAC3C,eAAe;YAaf,cAAc;YAKd,KAAK;CA0CpB"}
|
package/dist/importer.js
CHANGED
|
@@ -1,32 +1,20 @@
|
|
|
1
1
|
import { ImportFailed, ImportSuccessful, NotSupported, } from '@lde/sparql-importer';
|
|
2
|
-
import { LastModifiedDownloader
|
|
2
|
+
import { LastModifiedDownloader } from '@lde/distribution-downloader';
|
|
3
3
|
import { basename, dirname, join } from 'path';
|
|
4
4
|
import { readFile, stat, writeFile } from 'node:fs/promises';
|
|
5
|
-
const supportedFormats = new Map([
|
|
6
|
-
['application/n-triples', 'nt'],
|
|
7
|
-
['application/n-quads', 'nq'],
|
|
8
|
-
['text/turtle', 'ttl'],
|
|
9
|
-
]);
|
|
10
|
-
/**
|
|
11
|
-
* Import RDF to a QLever SPARQL server.
|
|
12
|
-
*
|
|
13
|
-
* - Use the QLever CLI because the Graph Store Protocol is not parallelized.
|
|
14
|
-
*/
|
|
15
5
|
export class Importer {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
'num-triples-per-batch': 100000,
|
|
6
|
+
options;
|
|
7
|
+
constructor(options) {
|
|
8
|
+
this.options = {
|
|
9
|
+
...options,
|
|
10
|
+
indexName: options.indexName ?? 'data',
|
|
11
|
+
downloader: options.downloader ?? new LastModifiedDownloader(),
|
|
12
|
+
cacheIndex: options.cacheIndex ?? true,
|
|
13
|
+
qleverOptions: {
|
|
14
|
+
...defaultQleverIndexOptions,
|
|
15
|
+
...options.qleverOptions,
|
|
16
|
+
},
|
|
28
17
|
};
|
|
29
|
-
this.cacheIndex = cacheIndex ?? true;
|
|
30
18
|
}
|
|
31
19
|
async import(distributions) {
|
|
32
20
|
const downloadDistributions = distributions.filter((distribution) => distribution.mimeType !== undefined &&
|
|
@@ -56,7 +44,7 @@ export class Importer {
|
|
|
56
44
|
return result;
|
|
57
45
|
}
|
|
58
46
|
async doImport(distribution) {
|
|
59
|
-
const localFile = await this.downloader.download(distribution);
|
|
47
|
+
const localFile = await this.options.downloader.download(distribution);
|
|
60
48
|
if (await this.isIndexUpToDate(localFile)) {
|
|
61
49
|
const tripleCount = await this.readTripleCount(localFile);
|
|
62
50
|
if (tripleCount === 0) {
|
|
@@ -64,7 +52,7 @@ export class Importer {
|
|
|
64
52
|
}
|
|
65
53
|
return new ImportSuccessful(distribution, undefined, tripleCount);
|
|
66
54
|
}
|
|
67
|
-
const format =
|
|
55
|
+
const format = fileFormatFromMimeType(distribution.mimeType);
|
|
68
56
|
let logs;
|
|
69
57
|
try {
|
|
70
58
|
logs = await this.index(localFile, format);
|
|
@@ -85,13 +73,6 @@ export class Importer {
|
|
|
85
73
|
await this.writeCacheInfo(localFile);
|
|
86
74
|
return new ImportSuccessful(distribution, undefined, tripleCount);
|
|
87
75
|
}
|
|
88
|
-
fileFormatFromMimeType(mimeType) {
|
|
89
|
-
const format = supportedFormats.get(mimeType);
|
|
90
|
-
if (format === undefined) {
|
|
91
|
-
throw new Error(`Unsupported media type: ${mimeType}`);
|
|
92
|
-
}
|
|
93
|
-
return format;
|
|
94
|
-
}
|
|
95
76
|
parseTripleCount(logs) {
|
|
96
77
|
// Extract num-triples.normal from the metadata JSON that the index
|
|
97
78
|
// command cats to stdout. Use a regex rather than JSON.parse because
|
|
@@ -100,13 +81,13 @@ export class Importer {
|
|
|
100
81
|
return match ? Number(match[1]) : undefined;
|
|
101
82
|
}
|
|
102
83
|
cacheInfoPath(dataFile) {
|
|
103
|
-
return join(dirname(dataFile), `${this.indexName}.cache-info.json`);
|
|
84
|
+
return join(dirname(dataFile), `${this.options.indexName}.cache-info.json`);
|
|
104
85
|
}
|
|
105
86
|
/**
|
|
106
87
|
* Check whether the cached index is still up to date.
|
|
107
88
|
*/
|
|
108
89
|
async isIndexUpToDate(dataFile) {
|
|
109
|
-
if (!this.cacheIndex)
|
|
90
|
+
if (!this.options.cacheIndex)
|
|
110
91
|
return false;
|
|
111
92
|
let cacheInfo;
|
|
112
93
|
try {
|
|
@@ -131,7 +112,7 @@ export class Importer {
|
|
|
131
112
|
/** Read the triple count from QLever's metadata file. */
|
|
132
113
|
async readTripleCount(dataFile) {
|
|
133
114
|
try {
|
|
134
|
-
const metadataPath = join(dirname(dataFile), `${this.indexName}.meta-data.json`);
|
|
115
|
+
const metadataPath = join(dirname(dataFile), `${this.options.indexName}.meta-data.json`);
|
|
135
116
|
const raw = await readFile(metadataPath, 'utf-8');
|
|
136
117
|
return this.parseTripleCount(raw);
|
|
137
118
|
}
|
|
@@ -143,12 +124,49 @@ export class Importer {
|
|
|
143
124
|
const info = { sourceFile: basename(dataFile) };
|
|
144
125
|
await writeFile(this.cacheInfoPath(dataFile), JSON.stringify(info));
|
|
145
126
|
}
|
|
146
|
-
async index(file, format, parseParallel
|
|
127
|
+
async index(file, format, parseParallel) {
|
|
147
128
|
const settingsFile = 'index.settings.json';
|
|
148
|
-
|
|
129
|
+
const settings = {
|
|
130
|
+
'ascii-prefixes-only': this.options.qleverOptions['ascii-prefixes-only'],
|
|
131
|
+
'num-triples-per-batch': this.options.qleverOptions['num-triples-per-batch'],
|
|
132
|
+
};
|
|
133
|
+
await writeFile(`${dirname(file)}/${settingsFile}`, JSON.stringify(settings));
|
|
149
134
|
// TODO: write index to named volume instead of bind mount for better performance.
|
|
150
|
-
const
|
|
151
|
-
const
|
|
152
|
-
|
|
135
|
+
const parallel = parseParallel ?? this.options.qleverOptions['parse-parallel'];
|
|
136
|
+
const flags = [
|
|
137
|
+
`-i ${this.options.indexName}`,
|
|
138
|
+
`-s ${settingsFile}`,
|
|
139
|
+
`-F ${format}`,
|
|
140
|
+
`--parse-parallel ${parallel}`,
|
|
141
|
+
`-m ${this.options.qleverOptions['stxxl-memory']}`,
|
|
142
|
+
this.options.qleverOptions['only-pso-and-pos-permutations']
|
|
143
|
+
? '-o --no-patterns'
|
|
144
|
+
: '',
|
|
145
|
+
'-f -',
|
|
146
|
+
]
|
|
147
|
+
.filter(Boolean)
|
|
148
|
+
.join(' ');
|
|
149
|
+
const metadataFile = `${this.options.indexName}.meta-data.json`;
|
|
150
|
+
const indexTask = await this.options.taskRunner.run(`(gunzip -c '${basename(file)}' 2>/dev/null || cat '${basename(file)}') | qlever-index ${flags} && cat ${metadataFile}`);
|
|
151
|
+
return await this.options.taskRunner.wait(indexTask);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
const supportedFormats = new Map([
|
|
155
|
+
['application/n-triples', 'nt'],
|
|
156
|
+
['application/n-quads', 'nq'],
|
|
157
|
+
['text/turtle', 'ttl'],
|
|
158
|
+
]);
|
|
159
|
+
const defaultQleverIndexOptions = {
|
|
160
|
+
'ascii-prefixes-only': true,
|
|
161
|
+
'num-triples-per-batch': 3_000_000,
|
|
162
|
+
'stxxl-memory': '10G',
|
|
163
|
+
'parse-parallel': true,
|
|
164
|
+
'only-pso-and-pos-permutations': false,
|
|
165
|
+
};
|
|
166
|
+
function fileFormatFromMimeType(mimeType) {
|
|
167
|
+
const format = supportedFormats.get(mimeType);
|
|
168
|
+
if (format === undefined) {
|
|
169
|
+
throw new Error(`Unsupported media type: ${mimeType}`);
|
|
153
170
|
}
|
|
171
|
+
return format;
|
|
154
172
|
}
|
package/package.json
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/sparql-qlever",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.1",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/sparql-qlever"
|
|
7
7
|
},
|
|
8
|
+
"license": "MIT",
|
|
8
9
|
"type": "module",
|
|
9
10
|
"exports": {
|
|
10
11
|
"./package.json": "./package.json",
|
|
@@ -25,7 +26,7 @@
|
|
|
25
26
|
"dependencies": {
|
|
26
27
|
"@lde/dataset": "0.7.2",
|
|
27
28
|
"@lde/distribution-downloader": "0.5.3",
|
|
28
|
-
"@lde/sparql-importer": "0.
|
|
29
|
+
"@lde/sparql-importer": "0.5.0",
|
|
29
30
|
"@lde/sparql-server": "0.4.10",
|
|
30
31
|
"@lde/task-runner": "0.2.10",
|
|
31
32
|
"@lde/task-runner-docker": "0.2.11",
|