@lde/sparql-qlever 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import { Downloader } from '@lde/distribution-downloader';
2
- import { Importer } from './importer.js';
2
+ import { Importer, QleverIndexOptions } from './importer.js';
3
3
  import { Server } from './server.js';
4
4
  export type QleverOptions = {
5
5
  /** Directory where downloaded data files are stored. */
@@ -8,8 +8,9 @@ export type QleverOptions = {
8
8
  /** @default 7001 */
9
9
  port?: number;
10
10
  downloader?: Downloader;
11
- /** Cache QLever indices and skip re-indexing when source data is unchanged. Defaults to `true`. */
11
+ /** Cache QLever indices and skip re-indexing when source data is unchanged. @default true */
12
12
  cacheIndex?: boolean;
13
+ qleverOptions?: QleverIndexOptions;
13
14
  } & ({
14
15
  mode: 'docker';
15
16
  image: string;
@@ -17,6 +18,12 @@ export type QleverOptions = {
17
18
  } | {
18
19
  mode: 'native';
19
20
  });
21
+ /**
22
+ * Create a paired QLever {@link Importer} and {@link Server} that share a
23
+ * single {@link TaskRunner}. In pipeline setups the importer and server must
24
+ * use the same runner (and therefore the same Docker container or working
25
+ * directory) so that the server can serve the index the importer built.
26
+ */
20
27
  export declare function createQlever(options: QleverOptions): {
21
28
  importer: Importer;
22
29
  server: Server<unknown>;
@@ -1 +1 @@
1
- {"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,MAAM,aAAa,GAAG;IAC1B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,mGAAmG;IACnG,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB,GAAG,CACA;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CACrB,CAAC;AAEF,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa;;;EA0BlD"}
1
+ {"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,MAAM,aAAa,GAAG;IAC1B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,6FAA6F;IAC7F,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC,GAAG,CACA;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CACrB,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa;;;EA2BlD"}
@@ -3,6 +3,12 @@ import { NativeTaskRunner } from '@lde/task-runner-native';
3
3
  import { LastModifiedDownloader, } from '@lde/distribution-downloader';
4
4
  import { Importer } from './importer.js';
5
5
  import { Server } from './server.js';
6
+ /**
7
+ * Create a paired QLever {@link Importer} and {@link Server} that share a
8
+ * single {@link TaskRunner}. In pipeline setups the importer and server must
9
+ * use the same runner (and therefore the same Docker container or working
10
+ * directory) so that the server can serve the index the importer built.
11
+ */
6
12
  export function createQlever(options) {
7
13
  const port = options.port ?? 7001;
8
14
  const taskRunner = options.mode === 'docker'
@@ -19,6 +25,7 @@ export function createQlever(options) {
19
25
  indexName: options.indexName,
20
26
  downloader: options.downloader ?? new LastModifiedDownloader(options.dataDir),
21
27
  cacheIndex: options.cacheIndex,
28
+ qleverOptions: options.qleverOptions,
22
29
  }),
23
30
  server: new Server({
24
31
  taskRunner,
@@ -1,34 +1,27 @@
1
- import { Importer as ImporterInterface, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
1
+ import { Importer as ImporterInterface, ImporterOptions, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
2
2
  import { Distribution } from '@lde/dataset';
3
- import { Downloader } from '@lde/distribution-downloader';
4
- import { TaskRunner } from '@lde/task-runner';
5
- export interface Options {
6
- taskRunner: TaskRunner<unknown>;
3
+ export interface QleverIndexOptions {
4
+ /** @default true */
5
+ 'ascii-prefixes-only'?: boolean;
6
+ /** @default 3_000_000 */
7
+ 'num-triples-per-batch'?: number;
8
+ /** Memory budget for sorting during the index build. @default '10G' */
9
+ 'stxxl-memory'?: string;
10
+ /** @default true */
11
+ 'parse-parallel'?: boolean;
12
+ /** Build only PSO and POS permutations. Faster, but queries with predicate variables won't work. Also disables pattern precomputation. @default false */
13
+ 'only-pso-and-pos-permutations'?: boolean;
14
+ }
15
+ export interface QleverImporterOptions extends ImporterOptions {
16
+ /** @default 'data' */
7
17
  indexName?: string;
8
- downloader?: Downloader;
9
- qleverOptions?: {
10
- 'ascii-prefixes-only': boolean;
11
- 'num-triples-per-batch': number;
12
- };
13
- port?: number;
14
- /** Cache QLever indices and skip re-indexing when source data is unchanged. Defaults to `true`. */
15
- cacheIndex?: boolean;
18
+ qleverOptions?: QleverIndexOptions;
16
19
  }
17
- /**
18
- * Import RDF to a QLever SPARQL server.
19
- *
20
- * - Use the QLever CLI because the Graph Store Protocol is not parallelized.
21
- */
22
20
  export declare class Importer implements ImporterInterface {
23
- private indexName;
24
- private taskRunner;
25
- private downloader;
26
- private qleverOptions;
27
- private cacheIndex;
28
- constructor({ taskRunner, downloader, indexName, qleverOptions, cacheIndex, }: Options);
21
+ private readonly options;
22
+ constructor(options: QleverImporterOptions);
29
23
  import(distributions: Distribution[]): Promise<NotSupported | ImportSuccessful | ImportFailed>;
30
24
  private doImport;
31
- private fileFormatFromMimeType;
32
25
  private parseTripleCount;
33
26
  private cacheInfoPath;
34
27
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AAGtC,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAU9C,MAAM,WAAW,OAAO;IACtB,UAAU,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,aAAa,CAAC,EAAE;QACd,qBAAqB,EAAE,OAAO,CAAC;QAC/B,uBAAuB,EAAE,MAAM,CAAC;KACjC,CAAC;IACF,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mGAAmG;IACnG,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAMD;;;;GAIG;AACH,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,SAAS,CAAC;IAClB,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,UAAU,CAAC;IACnB,OAAO,CAAC,aAAa,CAAC;IACtB,OAAO,CAAC,UAAU,CAAC;gBAEP,EACV,UAAU,EACV,UAAU,EACV,SAAS,EACT,aAAa,EACb,UAAU,GACX,EAAE,OAAO;IAWG,MAAM,CACjB,aAAa,EAAE,YAAY,EAAE,GAC5B,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YA+B5C,QAAQ;IA4CtB,OAAO,CAAC,sBAAsB;IAQ9B,OAAO,CAAC,gBAAgB;IAQxB,OAAO,CAAC,aAAa;IAIrB;;OAEG;YACW,eAAe;IA0B7B,yDAAyD;YAC3C,eAAe;YAaf,cAAc;YAKd,KAAK;CAyBpB"}
1
+ {"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,eAAe,EACf,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAK5C,MAAM,WAAW,kBAAkB;IACjC,oBAAoB;IACpB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,yBAAyB;IACzB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,yJAAyJ;IACzJ,+BAA+B,CAAC,EAAE,OAAO,CAAC;CAC3C;AAED,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC5D,sBAAsB;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC;AAWD,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,qBAAqB;IAa7B,MAAM,CACjB,aAAa,EAAE,YAAY,EAAE,GAC5B,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YA+B5C,QAAQ;IA4CtB,OAAO,CAAC,gBAAgB;IAQxB,OAAO,CAAC,aAAa;IAIrB;;OAEG;YACW,eAAe;IA0B7B,yDAAyD;YAC3C,eAAe;YAaf,cAAc;YAKd,KAAK;CA0CpB"}
package/dist/importer.js CHANGED
@@ -1,32 +1,20 @@
1
1
  import { ImportFailed, ImportSuccessful, NotSupported, } from '@lde/sparql-importer';
2
- import { LastModifiedDownloader, } from '@lde/distribution-downloader';
2
+ import { LastModifiedDownloader } from '@lde/distribution-downloader';
3
3
  import { basename, dirname, join } from 'path';
4
4
  import { readFile, stat, writeFile } from 'node:fs/promises';
5
- const supportedFormats = new Map([
6
- ['application/n-triples', 'nt'],
7
- ['application/n-quads', 'nq'],
8
- ['text/turtle', 'ttl'],
9
- ]);
10
- /**
11
- * Import RDF to a QLever SPARQL server.
12
- *
13
- * - Use the QLever CLI because the Graph Store Protocol is not parallelized.
14
- */
15
5
  export class Importer {
16
- indexName;
17
- taskRunner;
18
- downloader;
19
- qleverOptions;
20
- cacheIndex;
21
- constructor({ taskRunner, downloader, indexName, qleverOptions, cacheIndex, }) {
22
- this.taskRunner = taskRunner;
23
- this.downloader = downloader ?? new LastModifiedDownloader();
24
- this.indexName = indexName ?? 'data';
25
- this.qleverOptions = qleverOptions ?? {
26
- 'ascii-prefixes-only': true,
27
- 'num-triples-per-batch': 100000,
6
+ options;
7
+ constructor(options) {
8
+ this.options = {
9
+ ...options,
10
+ indexName: options.indexName ?? 'data',
11
+ downloader: options.downloader ?? new LastModifiedDownloader(),
12
+ cacheIndex: options.cacheIndex ?? true,
13
+ qleverOptions: {
14
+ ...defaultQleverIndexOptions,
15
+ ...options.qleverOptions,
16
+ },
28
17
  };
29
- this.cacheIndex = cacheIndex ?? true;
30
18
  }
31
19
  async import(distributions) {
32
20
  const downloadDistributions = distributions.filter((distribution) => distribution.mimeType !== undefined &&
@@ -56,7 +44,7 @@ export class Importer {
56
44
  return result;
57
45
  }
58
46
  async doImport(distribution) {
59
- const localFile = await this.downloader.download(distribution);
47
+ const localFile = await this.options.downloader.download(distribution);
60
48
  if (await this.isIndexUpToDate(localFile)) {
61
49
  const tripleCount = await this.readTripleCount(localFile);
62
50
  if (tripleCount === 0) {
@@ -64,7 +52,7 @@ export class Importer {
64
52
  }
65
53
  return new ImportSuccessful(distribution, undefined, tripleCount);
66
54
  }
67
- const format = this.fileFormatFromMimeType(distribution.mimeType);
55
+ const format = fileFormatFromMimeType(distribution.mimeType);
68
56
  let logs;
69
57
  try {
70
58
  logs = await this.index(localFile, format);
@@ -85,13 +73,6 @@ export class Importer {
85
73
  await this.writeCacheInfo(localFile);
86
74
  return new ImportSuccessful(distribution, undefined, tripleCount);
87
75
  }
88
- fileFormatFromMimeType(mimeType) {
89
- const format = supportedFormats.get(mimeType);
90
- if (format === undefined) {
91
- throw new Error(`Unsupported media type: ${mimeType}`);
92
- }
93
- return format;
94
- }
95
76
  parseTripleCount(logs) {
96
77
  // Extract num-triples.normal from the metadata JSON that the index
97
78
  // command cats to stdout. Use a regex rather than JSON.parse because
@@ -100,13 +81,13 @@ export class Importer {
100
81
  return match ? Number(match[1]) : undefined;
101
82
  }
102
83
  cacheInfoPath(dataFile) {
103
- return join(dirname(dataFile), `${this.indexName}.cache-info.json`);
84
+ return join(dirname(dataFile), `${this.options.indexName}.cache-info.json`);
104
85
  }
105
86
  /**
106
87
  * Check whether the cached index is still up to date.
107
88
  */
108
89
  async isIndexUpToDate(dataFile) {
109
- if (!this.cacheIndex)
90
+ if (!this.options.cacheIndex)
110
91
  return false;
111
92
  let cacheInfo;
112
93
  try {
@@ -131,7 +112,7 @@ export class Importer {
131
112
  /** Read the triple count from QLever's metadata file. */
132
113
  async readTripleCount(dataFile) {
133
114
  try {
134
- const metadataPath = join(dirname(dataFile), `${this.indexName}.meta-data.json`);
115
+ const metadataPath = join(dirname(dataFile), `${this.options.indexName}.meta-data.json`);
135
116
  const raw = await readFile(metadataPath, 'utf-8');
136
117
  return this.parseTripleCount(raw);
137
118
  }
@@ -143,12 +124,49 @@ export class Importer {
143
124
  const info = { sourceFile: basename(dataFile) };
144
125
  await writeFile(this.cacheInfoPath(dataFile), JSON.stringify(info));
145
126
  }
146
- async index(file, format, parseParallel = true) {
127
+ async index(file, format, parseParallel) {
147
128
  const settingsFile = 'index.settings.json';
148
- await writeFile(`${dirname(file)}/${settingsFile}`, JSON.stringify(this.qleverOptions));
129
+ const settings = {
130
+ 'ascii-prefixes-only': this.options.qleverOptions['ascii-prefixes-only'],
131
+ 'num-triples-per-batch': this.options.qleverOptions['num-triples-per-batch'],
132
+ };
133
+ await writeFile(`${dirname(file)}/${settingsFile}`, JSON.stringify(settings));
149
134
  // TODO: write index to named volume instead of bind mount for better performance.
150
- const metadataFile = `${this.indexName}.meta-data.json`;
151
- const indexTask = await this.taskRunner.run(`(gunzip -c '${basename(file)}' 2>/dev/null || cat '${basename(file)}') | qlever-index -i ${this.indexName} -s ${settingsFile} -F ${format} -p true${parseParallel ? '' : ' --parse-parallel false'} -f - && cat ${metadataFile}`);
152
- return await this.taskRunner.wait(indexTask);
135
+ const parallel = parseParallel ?? this.options.qleverOptions['parse-parallel'];
136
+ const flags = [
137
+ `-i ${this.options.indexName}`,
138
+ `-s ${settingsFile}`,
139
+ `-F ${format}`,
140
+ `--parse-parallel ${parallel}`,
141
+ `-m ${this.options.qleverOptions['stxxl-memory']}`,
142
+ this.options.qleverOptions['only-pso-and-pos-permutations']
143
+ ? '-o --no-patterns'
144
+ : '',
145
+ '-f -',
146
+ ]
147
+ .filter(Boolean)
148
+ .join(' ');
149
+ const metadataFile = `${this.options.indexName}.meta-data.json`;
150
+ const indexTask = await this.options.taskRunner.run(`(gunzip -c '${basename(file)}' 2>/dev/null || cat '${basename(file)}') | qlever-index ${flags} && cat ${metadataFile}`);
151
+ return await this.options.taskRunner.wait(indexTask);
152
+ }
153
+ }
154
+ const supportedFormats = new Map([
155
+ ['application/n-triples', 'nt'],
156
+ ['application/n-quads', 'nq'],
157
+ ['text/turtle', 'ttl'],
158
+ ]);
159
+ const defaultQleverIndexOptions = {
160
+ 'ascii-prefixes-only': true,
161
+ 'num-triples-per-batch': 3_000_000,
162
+ 'stxxl-memory': '10G',
163
+ 'parse-parallel': true,
164
+ 'only-pso-and-pos-permutations': false,
165
+ };
166
+ function fileFormatFromMimeType(mimeType) {
167
+ const format = supportedFormats.get(mimeType);
168
+ if (format === undefined) {
169
+ throw new Error(`Unsupported media type: ${mimeType}`);
153
170
  }
171
+ return format;
154
172
  }
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "@lde/sparql-qlever",
3
- "version": "0.11.0",
3
+ "version": "0.12.1",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/sparql-qlever"
7
7
  },
8
+ "license": "MIT",
8
9
  "type": "module",
9
10
  "exports": {
10
11
  "./package.json": "./package.json",
@@ -25,7 +26,7 @@
25
26
  "dependencies": {
26
27
  "@lde/dataset": "0.7.2",
27
28
  "@lde/distribution-downloader": "0.5.3",
28
- "@lde/sparql-importer": "0.4.2",
29
+ "@lde/sparql-importer": "0.5.0",
29
30
  "@lde/sparql-server": "0.4.10",
30
31
  "@lde/task-runner": "0.2.10",
31
32
  "@lde/task-runner-docker": "0.2.11",