@lde/sparql-qlever 0.10.2 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import { Downloader } from '@lde/distribution-downloader';
2
- import { Importer } from './importer.js';
2
+ import { Importer, QleverIndexOptions } from './importer.js';
3
3
  import { Server } from './server.js';
4
4
  export type QleverOptions = {
5
5
  /** Directory where downloaded data files are stored. */
@@ -8,8 +8,9 @@ export type QleverOptions = {
8
8
  /** @default 7001 */
9
9
  port?: number;
10
10
  downloader?: Downloader;
11
- /** Cache QLever indices and skip re-indexing when source data is unchanged. Defaults to `true`. */
11
+ /** Cache QLever indices and skip re-indexing when source data is unchanged. @default true */
12
12
  cacheIndex?: boolean;
13
+ qleverOptions?: QleverIndexOptions;
13
14
  } & ({
14
15
  mode: 'docker';
15
16
  image: string;
@@ -17,6 +18,12 @@ export type QleverOptions = {
17
18
  } | {
18
19
  mode: 'native';
19
20
  });
21
+ /**
22
+ * Create a paired QLever {@link Importer} and {@link Server} that share a
23
+ * single {@link TaskRunner}. In pipeline setups the importer and server must
24
+ * use the same runner (and therefore the same Docker container or working
25
+ * directory) so that the server can serve the index the importer built.
26
+ */
20
27
  export declare function createQlever(options: QleverOptions): {
21
28
  importer: Importer;
22
29
  server: Server<unknown>;
@@ -1 +1 @@
1
- {"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,MAAM,aAAa,GAAG;IAC1B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,mGAAmG;IACnG,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB,GAAG,CACA;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CACrB,CAAC;AAEF,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa;;;EA0BlD"}
1
+ {"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,MAAM,aAAa,GAAG;IAC1B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,6FAA6F;IAC7F,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC,GAAG,CACA;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CACrB,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa;;;EA2BlD"}
@@ -3,6 +3,12 @@ import { NativeTaskRunner } from '@lde/task-runner-native';
3
3
  import { LastModifiedDownloader, } from '@lde/distribution-downloader';
4
4
  import { Importer } from './importer.js';
5
5
  import { Server } from './server.js';
6
+ /**
7
+ * Create a paired QLever {@link Importer} and {@link Server} that share a
8
+ * single {@link TaskRunner}. In pipeline setups the importer and server must
9
+ * use the same runner (and therefore the same Docker container or working
10
+ * directory) so that the server can serve the index the importer built.
11
+ */
6
12
  export function createQlever(options) {
7
13
  const port = options.port ?? 7001;
8
14
  const taskRunner = options.mode === 'docker'
@@ -19,6 +25,7 @@ export function createQlever(options) {
19
25
  indexName: options.indexName,
20
26
  downloader: options.downloader ?? new LastModifiedDownloader(options.dataDir),
21
27
  cacheIndex: options.cacheIndex,
28
+ qleverOptions: options.qleverOptions,
22
29
  }),
23
30
  server: new Server({
24
31
  taskRunner,
@@ -1,34 +1,27 @@
1
- import { Importer as ImporterInterface, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
1
+ import { Importer as ImporterInterface, ImporterOptions, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
2
2
  import { Distribution } from '@lde/dataset';
3
- import { Downloader } from '@lde/distribution-downloader';
4
- import { TaskRunner } from '@lde/task-runner';
5
- export interface Options {
6
- taskRunner: TaskRunner<unknown>;
3
+ export interface QleverIndexOptions {
4
+ /** @default true */
5
+ 'ascii-prefixes-only'?: boolean;
6
+ /** @default 3_000_000 */
7
+ 'num-triples-per-batch'?: number;
8
+ /** Memory budget for sorting during the index build. @default '10G' */
9
+ 'stxxl-memory'?: string;
10
+ /** @default true */
11
+ 'parse-parallel'?: boolean;
12
+ /** Build only PSO and POS permutations. Faster, but queries with predicate variables won't work. Also disables pattern precomputation. @default false */
13
+ 'only-pso-and-pos-permutations'?: boolean;
14
+ }
15
+ export interface QleverImporterOptions extends ImporterOptions {
16
+ /** @default 'data' */
7
17
  indexName?: string;
8
- downloader?: Downloader;
9
- qleverOptions?: {
10
- 'ascii-prefixes-only': boolean;
11
- 'num-triples-per-batch': number;
12
- };
13
- port?: number;
14
- /** Cache QLever indices and skip re-indexing when source data is unchanged. Defaults to `true`. */
15
- cacheIndex?: boolean;
18
+ qleverOptions?: QleverIndexOptions;
16
19
  }
17
- /**
18
- * Import RDF to a QLever SPARQL server.
19
- *
20
- * - Use the QLever CLI because the Graph Store Protocol is not parallelized.
21
- */
22
20
  export declare class Importer implements ImporterInterface {
23
- private indexName;
24
- private taskRunner;
25
- private downloader;
26
- private qleverOptions;
27
- private cacheIndex;
28
- constructor({ taskRunner, downloader, indexName, qleverOptions, cacheIndex, }: Options);
21
+ private readonly options;
22
+ constructor(options: QleverImporterOptions);
29
23
  import(distributions: Distribution[]): Promise<NotSupported | ImportSuccessful | ImportFailed>;
30
24
  private doImport;
31
- private fileFormatFromMimeType;
32
25
  private parseTripleCount;
33
26
  private cacheInfoPath;
34
27
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AAGtC,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAU9C,MAAM,WAAW,OAAO;IACtB,UAAU,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,aAAa,CAAC,EAAE;QACd,qBAAqB,EAAE,OAAO,CAAC;QAC/B,uBAAuB,EAAE,MAAM,CAAC;KACjC,CAAC;IACF,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mGAAmG;IACnG,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAMD;;;;GAIG;AACH,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,SAAS,CAAC;IAClB,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,UAAU,CAAC;IACnB,OAAO,CAAC,aAAa,CAAC;IACtB,OAAO,CAAC,UAAU,CAAC;gBAEP,EACV,UAAU,EACV,UAAU,EACV,SAAS,EACT,aAAa,EACb,UAAU,GACX,EAAE,OAAO;IAWG,MAAM,CACjB,aAAa,EAAE,YAAY,EAAE,GAC5B,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YA+B5C,QAAQ;IAkCtB,OAAO,CAAC,sBAAsB;IAQ9B,OAAO,CAAC,gBAAgB;IAQxB,OAAO,CAAC,aAAa;IAIrB;;OAEG;YACW,eAAe;IA0B7B,yDAAyD;YAC3C,eAAe;YAaf,cAAc;YAKd,KAAK;CAmBpB"}
1
+ {"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,eAAe,EACf,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAK5C,MAAM,WAAW,kBAAkB;IACjC,oBAAoB;IACpB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,yBAAyB;IACzB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,yJAAyJ;IACzJ,+BAA+B,CAAC,EAAE,OAAO,CAAC;CAC3C;AAED,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC5D,sBAAsB;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACpC;AAWD,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,qBAAqB;IAa7B,MAAM,CACjB,aAAa,EAAE,YAAY,EAAE,GAC5B,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YA+B5C,QAAQ;IA4CtB,OAAO,CAAC,gBAAgB;IAQxB,OAAO,CAAC,aAAa;IAIrB;;OAEG;YACW,eAAe;IA0B7B,yDAAyD;YAC3C,eAAe;YAaf,cAAc;YAKd,KAAK;CA2CpB"}
package/dist/importer.js CHANGED
@@ -1,32 +1,20 @@
1
1
  import { ImportFailed, ImportSuccessful, NotSupported, } from '@lde/sparql-importer';
2
- import { LastModifiedDownloader, } from '@lde/distribution-downloader';
2
+ import { LastModifiedDownloader } from '@lde/distribution-downloader';
3
3
  import { basename, dirname, join } from 'path';
4
4
  import { readFile, stat, writeFile } from 'node:fs/promises';
5
- const supportedFormats = new Map([
6
- ['application/n-triples', 'nt'],
7
- ['application/n-quads', 'nq'],
8
- ['text/turtle', 'ttl'],
9
- ]);
10
- /**
11
- * Import RDF to a QLever SPARQL server.
12
- *
13
- * - Use the QLever CLI because the Graph Store Protocol is not parallelized.
14
- */
15
5
  export class Importer {
16
- indexName;
17
- taskRunner;
18
- downloader;
19
- qleverOptions;
20
- cacheIndex;
21
- constructor({ taskRunner, downloader, indexName, qleverOptions, cacheIndex, }) {
22
- this.taskRunner = taskRunner;
23
- this.downloader = downloader ?? new LastModifiedDownloader();
24
- this.indexName = indexName ?? 'data';
25
- this.qleverOptions = qleverOptions ?? {
26
- 'ascii-prefixes-only': true,
27
- 'num-triples-per-batch': 100000,
6
+ options;
7
+ constructor(options) {
8
+ this.options = {
9
+ ...options,
10
+ indexName: options.indexName ?? 'data',
11
+ downloader: options.downloader ?? new LastModifiedDownloader(),
12
+ cacheIndex: options.cacheIndex ?? true,
13
+ qleverOptions: {
14
+ ...defaultQleverIndexOptions,
15
+ ...options.qleverOptions,
16
+ },
28
17
  };
29
- this.cacheIndex = cacheIndex ?? true;
30
18
  }
31
19
  async import(distributions) {
32
20
  const downloadDistributions = distributions.filter((distribution) => distribution.mimeType !== undefined &&
@@ -56,7 +44,7 @@ export class Importer {
56
44
  return result;
57
45
  }
58
46
  async doImport(distribution) {
59
- const localFile = await this.downloader.download(distribution);
47
+ const localFile = await this.options.downloader.download(distribution);
60
48
  if (await this.isIndexUpToDate(localFile)) {
61
49
  const tripleCount = await this.readTripleCount(localFile);
62
50
  if (tripleCount === 0) {
@@ -64,7 +52,20 @@ export class Importer {
64
52
  }
65
53
  return new ImportSuccessful(distribution, undefined, tripleCount);
66
54
  }
67
- const logs = await this.index(localFile, this.fileFormatFromMimeType(distribution.mimeType));
55
+ const format = fileFormatFromMimeType(distribution.mimeType);
56
+ let logs;
57
+ try {
58
+ logs = await this.index(localFile, format);
59
+ }
60
+ catch (error) {
61
+ if (format === 'ttl' &&
62
+ error.message?.includes('multiline string literal')) {
63
+ logs = await this.index(localFile, format, false);
64
+ }
65
+ else {
66
+ throw error;
67
+ }
68
+ }
68
69
  const tripleCount = this.parseTripleCount(logs);
69
70
  if (tripleCount === 0) {
70
71
  return new ImportFailed(distribution, 'Indexed 0 triples from distribution');
@@ -72,13 +73,6 @@ export class Importer {
72
73
  await this.writeCacheInfo(localFile);
73
74
  return new ImportSuccessful(distribution, undefined, tripleCount);
74
75
  }
75
- fileFormatFromMimeType(mimeType) {
76
- const format = supportedFormats.get(mimeType);
77
- if (format === undefined) {
78
- throw new Error(`Unsupported media type: ${mimeType}`);
79
- }
80
- return format;
81
- }
82
76
  parseTripleCount(logs) {
83
77
  // Extract num-triples.normal from the metadata JSON that the index
84
78
  // command cats to stdout. Use a regex rather than JSON.parse because
@@ -87,13 +81,13 @@ export class Importer {
87
81
  return match ? Number(match[1]) : undefined;
88
82
  }
89
83
  cacheInfoPath(dataFile) {
90
- return join(dirname(dataFile), `${this.indexName}.cache-info.json`);
84
+ return join(dirname(dataFile), `${this.options.indexName}.cache-info.json`);
91
85
  }
92
86
  /**
93
87
  * Check whether the cached index is still up to date.
94
88
  */
95
89
  async isIndexUpToDate(dataFile) {
96
- if (!this.cacheIndex)
90
+ if (!this.options.cacheIndex)
97
91
  return false;
98
92
  let cacheInfo;
99
93
  try {
@@ -118,7 +112,7 @@ export class Importer {
118
112
  /** Read the triple count from QLever's metadata file. */
119
113
  async readTripleCount(dataFile) {
120
114
  try {
121
- const metadataPath = join(dirname(dataFile), `${this.indexName}.meta-data.json`);
115
+ const metadataPath = join(dirname(dataFile), `${this.options.indexName}.meta-data.json`);
122
116
  const raw = await readFile(metadataPath, 'utf-8');
123
117
  return this.parseTripleCount(raw);
124
118
  }
@@ -130,12 +124,50 @@ export class Importer {
130
124
  const info = { sourceFile: basename(dataFile) };
131
125
  await writeFile(this.cacheInfoPath(dataFile), JSON.stringify(info));
132
126
  }
133
- async index(file, format) {
127
+ async index(file, format, parseParallel) {
134
128
  const settingsFile = 'index.settings.json';
135
- await writeFile(`${dirname(file)}/${settingsFile}`, JSON.stringify(this.qleverOptions));
129
+ const settings = {
130
+ 'ascii-prefixes-only': this.options.qleverOptions['ascii-prefixes-only'],
131
+ 'num-triples-per-batch': this.options.qleverOptions['num-triples-per-batch'],
132
+ };
133
+ await writeFile(`${dirname(file)}/${settingsFile}`, JSON.stringify(settings));
136
134
  // TODO: write index to named volume instead of bind mount for better performance.
137
- const metadataFile = `${this.indexName}.meta-data.json`;
138
- const indexTask = await this.taskRunner.run(`(gunzip -c '${basename(file)}' 2>/dev/null || cat '${basename(file)}') | qlever-index -i ${this.indexName} -s ${settingsFile} -F ${format} -p true -f - && cat ${metadataFile}`);
139
- return await this.taskRunner.wait(indexTask);
135
+ const parallel = parseParallel ?? this.options.qleverOptions['parse-parallel'];
136
+ const flags = [
137
+ `-i ${this.options.indexName}`,
138
+ `-s ${settingsFile}`,
139
+ `-F ${format}`,
140
+ `-p true`,
141
+ `-m ${this.options.qleverOptions['stxxl-memory']}`,
142
+ parallel ? '' : '--parse-parallel false',
143
+ this.options.qleverOptions['only-pso-and-pos-permutations']
144
+ ? '-o --no-patterns'
145
+ : '',
146
+ '-f -',
147
+ ]
148
+ .filter(Boolean)
149
+ .join(' ');
150
+ const metadataFile = `${this.options.indexName}.meta-data.json`;
151
+ const indexTask = await this.options.taskRunner.run(`(gunzip -c '${basename(file)}' 2>/dev/null || cat '${basename(file)}') | qlever-index ${flags} && cat ${metadataFile}`);
152
+ return await this.options.taskRunner.wait(indexTask);
153
+ }
154
+ }
155
+ const supportedFormats = new Map([
156
+ ['application/n-triples', 'nt'],
157
+ ['application/n-quads', 'nq'],
158
+ ['text/turtle', 'ttl'],
159
+ ]);
160
+ const defaultQleverIndexOptions = {
161
+ 'ascii-prefixes-only': true,
162
+ 'num-triples-per-batch': 3_000_000,
163
+ 'stxxl-memory': '10G',
164
+ 'parse-parallel': true,
165
+ 'only-pso-and-pos-permutations': false,
166
+ };
167
+ function fileFormatFromMimeType(mimeType) {
168
+ const format = supportedFormats.get(mimeType);
169
+ if (format === undefined) {
170
+ throw new Error(`Unsupported media type: ${mimeType}`);
140
171
  }
172
+ return format;
141
173
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/sparql-qlever",
3
- "version": "0.10.2",
3
+ "version": "0.12.0",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/sparql-qlever"
@@ -25,7 +25,7 @@
25
25
  "dependencies": {
26
26
  "@lde/dataset": "0.7.2",
27
27
  "@lde/distribution-downloader": "0.5.3",
28
- "@lde/sparql-importer": "0.4.2",
28
+ "@lde/sparql-importer": "0.5.0",
29
29
  "@lde/sparql-server": "0.4.10",
30
30
  "@lde/task-runner": "0.2.10",
31
31
  "@lde/task-runner-docker": "0.2.11",