@lde/sparql-qlever 0.14.8 → 0.14.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/importer.js CHANGED
@@ -200,8 +200,9 @@ const nativeFormats = new Map([
200
200
  ]);
201
201
  /**
202
202
  * Accepted distribution media types, in preference order: the first match is
203
- * tried first. Native formats win over JSON-LD and RDF/XML because they skip
204
- * the Node-side preprocessor.
203
+ * tried first. Native formats win over JSON-LD, RDF/XML and TriG because they
204
+ * skip the Node-side preprocessor; TriG comes last so a streaming native dump
205
+ * is always preferred when a dataset offers both.
205
206
  *
206
207
  * `application/zip` is intentionally absent — the inner RDF format must be
207
208
  * declared via `mediaType` with `application/zip` appearing only as the
@@ -213,6 +214,7 @@ const acceptedMediaTypes = [
213
214
  'text/turtle',
214
215
  'application/ld+json',
215
216
  'application/rdf+xml',
217
+ 'application/trig',
216
218
  ];
217
219
  const defaultQleverIndexOptions = {
218
220
  'ascii-prefixes-only': true,
@@ -9,8 +9,8 @@ export interface PreprocessResult {
9
9
  * Whether a distribution needs Node-side preprocessing before `qlever-index`
10
10
  * can read it.
11
11
  *
12
- * JSON-LD and RDF/XML distributions return `true`: `qlever-index` cannot parse
13
- * either, so we stream them through `rdf-parse` into N-Quads first.
12
+ * JSON-LD, RDF/XML and TriG distributions return `true`: `qlever-index` cannot
13
+ * parse any of them, so we stream them through `rdf-parse` into N-Quads first.
14
14
  *
15
15
  * Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
16
16
  * `application/gzip` or `application/zip` — go straight through the shell
@@ -20,13 +20,13 @@ export interface PreprocessResult {
20
20
  */
21
21
  export declare function needsPreprocessing(distribution: Distribution): boolean;
22
22
  /**
23
- * Convert a JSON-LD or RDF/XML distribution to N-Quads alongside the source
24
- * file.
23
+ * Convert a JSON-LD, RDF/XML or TriG distribution to N-Quads alongside the
24
+ * source file.
25
25
  *
26
26
  * Streams the source through `rdf-parse` → `rdf-serialize` so memory use
27
27
  * stays bounded regardless of input size. Handles gzip transparently
28
28
  * (declared `compressFormat` or `.gz` filename) and zip containers (folds
29
- * each matching entry into the output stream in order).
29
+ * every parseable entry into the output stream in order).
30
30
  *
31
31
  * Cached: if the output is newer than the input, it is reused as-is.
32
32
  */
@@ -1 +1 @@
1
- {"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAmC5C,MAAM,WAAW,gBAAgB;IAC/B,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,IAAI,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAKtE;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAC9B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,gBAAgB,CAAC,CA2B3B"}
1
+ {"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AA6B5C,MAAM,WAAW,gBAAgB;IAC/B,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,IAAI,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAKtE;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAC9B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,gBAAgB,CAAC,CAyB3B"}
@@ -3,30 +3,31 @@ import { rdfSerializer } from 'rdf-serialize';
3
3
  import { createGunzip } from 'node:zlib';
4
4
  import { createReadStream, createWriteStream } from 'node:fs';
5
5
  import { rm, stat } from 'node:fs/promises';
6
- import { extname } from 'node:path';
7
6
  import { finished } from 'node:stream/promises';
8
7
  import { promisify } from 'node:util';
9
8
  import yauzl from 'yauzl';
10
9
  const JSONLD_MIME = 'application/ld+json';
11
10
  const RDFXML_MIME = 'application/rdf+xml';
11
+ const TRIG_MIME = 'application/trig';
12
12
  const ZIP_MIME = 'application/zip';
13
13
  const GZIP_MIME = 'application/gzip';
14
14
  const GZIP_MIME_LEGACY = 'application/x-gzip';
15
15
  /**
16
- * RDF media types that `qlever-index` cannot read natively, so they are streamed
17
- * through `rdf-parse` N-Quads first. The map key is the `rdf-parse`
18
- * `contentType`; the value carries the metadata needed to handle zip containers.
16
+ * RDF media types `qlever-index` cannot read natively, keyed by `rdf-parse`
17
+ * `contentType` with a label for warnings. TriG belongs here too: its
18
+ * `<graph> { }` blocks parse as neither N-Quads nor Turtle.
19
19
  */
20
20
  const preprocessFormats = new Map([
21
- [JSONLD_MIME, { label: 'JSON-LD', zipExtensions: ['.jsonld', '.json'] }],
22
- [RDFXML_MIME, { label: 'RDF/XML', zipExtensions: ['.rdf', '.xml', '.owl'] }],
21
+ [JSONLD_MIME, 'JSON-LD'],
22
+ [RDFXML_MIME, 'RDF/XML'],
23
+ [TRIG_MIME, 'TriG'],
23
24
  ]);
24
25
  /**
25
26
  * Whether a distribution needs Node-side preprocessing before `qlever-index`
26
27
  * can read it.
27
28
  *
28
- * JSON-LD and RDF/XML distributions return `true`: `qlever-index` cannot parse
29
- * either, so we stream them through `rdf-parse` into N-Quads first.
29
+ * JSON-LD, RDF/XML and TriG distributions return `true`: `qlever-index` cannot
30
+ * parse any of them, so we stream them through `rdf-parse` into N-Quads first.
30
31
  *
31
32
  * Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
32
33
  * `application/gzip` or `application/zip` — go straight through the shell
@@ -39,22 +40,20 @@ export function needsPreprocessing(distribution) {
39
40
  preprocessFormats.has(distribution.mimeType));
40
41
  }
41
42
  /**
42
- * Convert a JSON-LD or RDF/XML distribution to N-Quads alongside the source
43
- * file.
43
+ * Convert a JSON-LD, RDF/XML or TriG distribution to N-Quads alongside the
44
+ * source file.
44
45
  *
45
46
  * Streams the source through `rdf-parse` → `rdf-serialize` so memory use
46
47
  * stays bounded regardless of input size. Handles gzip transparently
47
48
  * (declared `compressFormat` or `.gz` filename) and zip containers (folds
48
- * each matching entry into the output stream in order).
49
+ * every parseable entry into the output stream in order).
49
50
  *
50
51
  * Cached: if the output is newer than the input, it is reused as-is.
51
52
  */
52
53
  export async function preprocess(localFile, distribution) {
53
54
  const contentType = distribution.mimeType;
54
- const format = contentType === undefined
55
- ? undefined
56
- : preprocessFormats.get(contentType);
57
- if (contentType === undefined || format === undefined) {
55
+ const label = contentType === undefined ? undefined : preprocessFormats.get(contentType);
56
+ if (contentType === undefined || label === undefined) {
58
57
  throw new Error(`preprocess called for distribution that does not need preprocessing: mediaType=${distribution.mimeType}`);
59
58
  }
60
59
  const outputFile = `${localFile}.preprocessed.nq`;
@@ -64,7 +63,7 @@ export async function preprocess(localFile, distribution) {
64
63
  await rm(outputFile, { force: true });
65
64
  const warnings = [];
66
65
  if (distribution.compressMimeType === ZIP_MIME) {
67
- await streamRdfZip(localFile, outputFile, contentType, format, warnings);
66
+ await streamRdfZip(localFile, outputFile, contentType, label, warnings);
68
67
  }
69
68
  else {
70
69
  await streamRdfFile(localFile, outputFile, contentType, distribution);
@@ -118,7 +117,12 @@ async function streamRdfFile(localFile, outputFile, contentType, distribution) {
118
117
  }
119
118
  }
120
119
  const openZip = promisify(yauzl.open);
121
- async function streamRdfZip(zipFile, outputFile, contentType, format, warnings) {
120
+ /**
121
+ * Fold every parseable entry of a zip into the N-Quads output, in order. The
122
+ * declared `contentType` drives the parser; an entry that fails to parse (a
123
+ * sidecar, OS metadata) is skipped with a warning. Throws if nothing parses.
124
+ */
125
+ async function streamRdfZip(zipFile, outputFile, contentType, label, warnings) {
122
126
  const zip = await openZip(zipFile, { lazyEntries: true });
123
127
  const output = createWriteStream(outputFile);
124
128
  let entriesProcessed = 0;
@@ -128,25 +132,25 @@ async function streamRdfZip(zipFile, outputFile, contentType, format, warnings)
128
132
  zip.on('end', resolve);
129
133
  zip.on('entry', (entry) => {
130
134
  void (async () => {
135
+ if (entry.fileName.endsWith('/')) {
136
+ zip.readEntry();
137
+ return;
138
+ }
131
139
  try {
132
- if (entry.fileName.endsWith('/')) {
133
- zip.readEntry();
134
- return;
140
+ const stream = await openZipEntry(zip, entry);
141
+ try {
142
+ await pipeRdfToWritable(stream, output, contentType);
143
+ entriesProcessed++;
135
144
  }
136
- const extension = extname(entry.fileName).toLowerCase();
137
- if (!format.zipExtensions.includes(extension)) {
138
- warnings.push(`Skipping zip entry ${entry.fileName}: extension ${extension || '(none)'} is not ${format.label}`);
139
- zip.readEntry();
140
- return;
145
+ finally {
146
+ // yauzl lazyEntries won't advance until this stream is released.
147
+ stream.destroy();
141
148
  }
142
- const stream = await openZipEntry(zip, entry);
143
- await pipeRdfToWritable(stream, output, contentType);
144
- entriesProcessed++;
145
- zip.readEntry();
146
149
  }
147
150
  catch (error) {
148
- reject(error);
151
+ warnings.push(`Skipping zip entry ${entry.fileName}: not valid ${label} (${error.message})`);
149
152
  }
153
+ zip.readEntry();
150
154
  })();
151
155
  });
152
156
  zip.readEntry();
@@ -157,7 +161,7 @@ async function streamRdfZip(zipFile, outputFile, contentType, format, warnings)
157
161
  await closeWritable(output);
158
162
  }
159
163
  if (entriesProcessed === 0) {
160
- throw new Error(`Zip ${zipFile} contains no ${format.label} entries`);
164
+ throw new Error(`Zip ${zipFile} contains no valid ${label} entries`);
161
165
  }
162
166
  }
163
167
  function openZipEntry(zip, entry) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/sparql-qlever",
3
- "version": "0.14.8",
3
+ "version": "0.14.9",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/sparql-qlever"
@@ -24,9 +24,9 @@
24
24
  "!**/*.tsbuildinfo"
25
25
  ],
26
26
  "dependencies": {
27
- "@lde/dataset": "0.7.6",
28
- "@lde/distribution-downloader": "0.6.4",
29
- "@lde/sparql-importer": "0.6.4",
27
+ "@lde/dataset": "0.7.7",
28
+ "@lde/distribution-downloader": "0.6.5",
29
+ "@lde/sparql-importer": "0.6.5",
30
30
  "@lde/sparql-server": "0.4.11",
31
31
  "@lde/task-runner": "0.2.11",
32
32
  "@lde/task-runner-docker": "0.2.13",