@lde/sparql-qlever 0.9.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,3 +1,11 @@
1
1
  # SPARQL QLever
2
2
 
3
3
  An adapter for the [QLever](https://github.com/ad-freiburg/qlever) SPARQL server.
4
+
5
+ ## Index caching
6
+
7
+ Building a QLever index is slow. To avoid rebuilding it on every pipeline run, the importer caches a single index and reuses it when the source data hasn't changed. On subsequent runs, indexing is skipped when the source file matches and hasn't been re-downloaded.
8
+
9
+ Only **one** index is cached at a time. In a multi-dataset pipeline, each dataset overwrites the previous index. On re-run, the last-indexed dataset gets a cache hit while the others rebuild.
10
+
11
+ Caching is enabled by default. Disable it by passing `cacheIndex: false` to `createQlever()` or the `Importer` constructor (e.g. driven by a `QLEVER_CACHE_INDEX=false` environment variable).
@@ -8,6 +8,8 @@ export type QleverOptions = {
8
8
  /** @default 7001 */
9
9
  port?: number;
10
10
  downloader?: Downloader;
11
+ /** Cache QLever indices and skip re-indexing when source data is unchanged. Defaults to `true`. */
12
+ cacheIndex?: boolean;
11
13
  } & ({
12
14
  mode: 'docker';
13
15
  image: string;
@@ -1 +1 @@
1
- {"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,MAAM,aAAa,GAAG;IAC1B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;CACzB,GAAG,CACA;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CACrB,CAAC;AAEF,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa;;;EAyBlD"}
1
+ {"version":3,"file":"createQlever.d.ts","sourceRoot":"","sources":["../src/createQlever.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,MAAM,aAAa,GAAG;IAC1B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,mGAAmG;IACnG,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB,GAAG,CACA;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CACrB,CAAC;AAEF,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa;;;EA0BlD"}
@@ -18,6 +18,7 @@ export function createQlever(options) {
18
18
  taskRunner,
19
19
  indexName: options.indexName,
20
20
  downloader: options.downloader ?? new LastModifiedDownloader(options.dataDir),
21
+ cacheIndex: options.cacheIndex,
21
22
  }),
22
23
  server: new Server({
23
24
  taskRunner,
@@ -11,6 +11,8 @@ export interface Options {
11
11
  'num-triples-per-batch': number;
12
12
  };
13
13
  port?: number;
14
+ /** Cache QLever indices and skip re-indexing when source data is unchanged. Defaults to `true`. */
15
+ cacheIndex?: boolean;
14
16
  }
15
17
  /**
16
18
  * Import RDF to a QLever SPARQL server.
@@ -22,11 +24,20 @@ export declare class Importer implements ImporterInterface {
22
24
  private taskRunner;
23
25
  private downloader;
24
26
  private qleverOptions;
25
- constructor({ taskRunner, downloader, indexName, qleverOptions }: Options);
27
+ private cacheIndex;
28
+ constructor({ taskRunner, downloader, indexName, qleverOptions, cacheIndex, }: Options);
26
29
  import(dataset: Dataset): Promise<NotSupported | ImportSuccessful | ImportFailed>;
27
30
  private doImport;
28
31
  private fileFormatFromMimeType;
29
32
  private parseTripleCount;
33
+ private cacheInfoPath;
34
+ /**
35
+ * Check whether the cached index is still up to date.
36
+ */
37
+ private isIndexUpToDate;
38
+ /** Read the triple count from QLever's metadata file. */
39
+ private readTripleCount;
40
+ private writeCacheInfo;
30
41
  private index;
31
42
  }
32
43
  //# sourceMappingURL=importer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AAGtC,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAU9C,MAAM,WAAW,OAAO;IACtB,UAAU,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,aAAa,CAAC,EAAE;QACd,qBAAqB,EAAE,OAAO,CAAC;QAC/B,uBAAuB,EAAE,MAAM,CAAC;KACjC,CAAC;IACF,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;;;GAIG;AACH,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,SAAS,CAAC;IAClB,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,UAAU,CAAC;IACnB,OAAO,CAAC,aAAa,CAAC;gBAEV,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS,EAAE,aAAa,EAAE,EAAE,OAAO;IAU5D,MAAM,CACjB,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YAiC5C,QAAQ;IAatB,OAAO,CAAC,sBAAsB;IAQ9B,OAAO,CAAC,gBAAgB;YAQV,KAAK;CAmBpB"}
1
+ {"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,iBAAiB,EAC7B,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,UAAU,EAEX,MAAM,8BAA8B,CAAC;AAGtC,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAU9C,MAAM,WAAW,OAAO;IACtB,UAAU,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,aAAa,CAAC,EAAE;QACd,qBAAqB,EAAE,OAAO,CAAC;QAC/B,uBAAuB,EAAE,MAAM,CAAC;KACjC,CAAC;IACF,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mGAAmG;IACnG,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAMD;;;;GAIG;AACH,qBAAa,QAAS,YAAW,iBAAiB;IAChD,OAAO,CAAC,SAAS,CAAC;IAClB,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,UAAU,CAAC;IACnB,OAAO,CAAC,aAAa,CAAC;IACtB,OAAO,CAAC,UAAU,CAAC;gBAEP,EACV,UAAU,EACV,UAAU,EACV,SAAS,EACT,aAAa,EACb,UAAU,GACX,EAAE,OAAO;IAWG,MAAM,CACjB,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC;YAiC5C,QAAQ;IAqBtB,OAAO,CAAC,sBAAsB;IAQ9B,OAAO,CAAC,gBAAgB;IAQxB,OAAO,CAAC,aAAa;IAIrB;;OAEG;YACW,eAAe;IA0B7B,yDAAyD;YAC3C,eAAe;YAaf,cAAc;YAKd,KAAK;CAmBpB"}
package/dist/importer.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { ImportFailed, ImportSuccessful, NotSupported, } from '@lde/sparql-importer';
2
2
  import { LastModifiedDownloader, } from '@lde/distribution-downloader';
3
- import { basename, dirname } from 'path';
4
- import { writeFile } from 'node:fs/promises';
3
+ import { basename, dirname, join } from 'path';
4
+ import { readFile, stat, writeFile } from 'node:fs/promises';
5
5
  const supportedFormats = new Map([
6
6
  ['application/n-triples', 'nt'],
7
7
  ['application/n-quads', 'nq'],
@@ -17,7 +17,8 @@ export class Importer {
17
17
  taskRunner;
18
18
  downloader;
19
19
  qleverOptions;
20
- constructor({ taskRunner, downloader, indexName, qleverOptions }) {
20
+ cacheIndex;
21
+ constructor({ taskRunner, downloader, indexName, qleverOptions, cacheIndex, }) {
21
22
  this.taskRunner = taskRunner;
22
23
  this.downloader = downloader ?? new LastModifiedDownloader();
23
24
  this.indexName = indexName ?? 'data';
@@ -25,6 +26,7 @@ export class Importer {
25
26
  'ascii-prefixes-only': true,
26
27
  'num-triples-per-batch': 100000,
27
28
  };
29
+ this.cacheIndex = cacheIndex ?? true;
28
30
  }
29
31
  async import(dataset) {
30
32
  const downloadDistributions = dataset
@@ -57,8 +59,13 @@ export class Importer {
57
59
  }
58
60
  async doImport(distribution) {
59
61
  const localFile = await this.downloader.download(distribution);
62
+ if (await this.isIndexUpToDate(localFile)) {
63
+ const tripleCount = await this.readTripleCount(localFile);
64
+ return new ImportSuccessful(distribution, undefined, tripleCount);
65
+ }
60
66
  const logs = await this.index(localFile, this.fileFormatFromMimeType(distribution.mimeType));
61
67
  const tripleCount = this.parseTripleCount(logs);
68
+ await this.writeCacheInfo(localFile);
62
69
  return new ImportSuccessful(distribution, undefined, tripleCount);
63
70
  }
64
71
  fileFormatFromMimeType(mimeType) {
@@ -75,6 +82,50 @@ export class Importer {
75
82
  const match = logs.match(/"num-triples":\{[^}]*"normal":(\d+)/);
76
83
  return match ? Number(match[1]) : undefined;
77
84
  }
85
+ cacheInfoPath(dataFile) {
86
+ return join(dirname(dataFile), `${this.indexName}.cache-info.json`);
87
+ }
88
+ /**
89
+ * Check whether the cached index is still up to date.
90
+ */
91
+ async isIndexUpToDate(dataFile) {
92
+ if (!this.cacheIndex)
93
+ return false;
94
+ let cacheInfo;
95
+ try {
96
+ const raw = await readFile(this.cacheInfoPath(dataFile), 'utf-8');
97
+ cacheInfo = JSON.parse(raw);
98
+ }
99
+ catch {
100
+ return false; // No cache marker — first run.
101
+ }
102
+ if (cacheInfo.sourceFile !== basename(dataFile)) {
103
+ return false; // Different dataset was last indexed.
104
+ }
105
+ const [cacheInfoStat, dataFileStat] = await Promise.all([
106
+ stat(this.cacheInfoPath(dataFile)),
107
+ stat(dataFile),
108
+ ]);
109
+ if (dataFileStat.mtimeMs > cacheInfoStat.mtimeMs) {
110
+ return false; // Data was re-downloaded.
111
+ }
112
+ return true;
113
+ }
114
+ /** Read the triple count from QLever's metadata file. */
115
+ async readTripleCount(dataFile) {
116
+ try {
117
+ const metadataPath = join(dirname(dataFile), `${this.indexName}.meta-data.json`);
118
+ const raw = await readFile(metadataPath, 'utf-8');
119
+ return this.parseTripleCount(raw);
120
+ }
121
+ catch {
122
+ return undefined;
123
+ }
124
+ }
125
+ async writeCacheInfo(dataFile) {
126
+ const info = { sourceFile: basename(dataFile) };
127
+ await writeFile(this.cacheInfoPath(dataFile), JSON.stringify(info));
128
+ }
78
129
  async index(file, format) {
79
130
  const settingsFile = 'index.settings.json';
80
131
  await writeFile(`${dirname(file)}/${settingsFile}`, JSON.stringify(this.qleverOptions));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/sparql-qlever",
3
- "version": "0.9.3",
3
+ "version": "0.10.0",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/sparql-qlever"