@lde/sparql-qlever 0.14.4 → 0.14.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/importer.js CHANGED
@@ -168,12 +168,26 @@ export class Importer {
168
168
  const metadataFile = `${this.options.indexName}.meta-data.json`;
169
169
  const localName = basename(file);
170
170
  const decompressCommand = localName.toLowerCase().endsWith('.zip')
171
- ? `unzip -p '${localName}'`
172
- : `(gunzip -c '${localName}' 2>/dev/null || cat '${localName}')`;
173
- const indexTask = await this.options.taskRunner.run(`${decompressCommand} | qlever-index ${flags} && cat ${metadataFile}`);
171
+ ? `unzip -p ${shellQuote(localName)}`
172
+ : `(gunzip -c ${shellQuote(localName)} 2>/dev/null || cat ${shellQuote(localName)})`;
173
+ const indexTask = await this.options.taskRunner.run(`${decompressCommand} | qlever-index ${flags} && cat ${shellQuote(metadataFile)}`);
174
174
  return await this.options.taskRunner.wait(indexTask);
175
175
  }
176
176
  }
177
+ /**
178
+ * POSIX-quote a value for safe interpolation into a shell command: wrap it in
179
+ * single quotes and escape any embedded single quote as `'\''`.
180
+ *
181
+ * Without this, a data filename containing an apostrophe — e.g. a dataset
182
+ * titled `'s-Hertogenbosch`, whose distribution URL maps to a local file like
183
+ * `…Erfgoed+'s-Hertogenbosch.nt` — would terminate the surrounding quotes, so
184
+ * `cat`/`gunzip` would read a non-existent path and feed `qlever-index` empty
185
+ * input. The index then "succeeds" with 0 triples, the import is treated as
186
+ * failed, and every distribution (and the JSON-LD fallback) fails the same way.
187
+ */
188
+ function shellQuote(value) {
189
+ return `'${value.replace(/'/g, "'\\''")}'`;
190
+ }
177
191
  /**
178
192
  * Native QLever index formats — `qlever-index -F <flag>` consumes these
179
193
  * directly. JSON-LD is not here: it is preprocessed to N-Quads first (see
@@ -186,8 +200,8 @@ const nativeFormats = new Map([
186
200
  ]);
187
201
  /**
188
202
  * Accepted distribution media types, in preference order: the first match is
189
- * tried first. Native formats win over JSON-LD because they skip the Node-side
190
- * preprocessor.
203
+ * tried first. Native formats win over JSON-LD and RDF/XML because they skip
204
+ * the Node-side preprocessor.
191
205
  *
192
206
  * `application/zip` is intentionally absent — the inner RDF format must be
193
207
  * declared via `mediaType` with `application/zip` appearing only as the
@@ -198,6 +212,7 @@ const acceptedMediaTypes = [
198
212
  'application/n-triples',
199
213
  'text/turtle',
200
214
  'application/ld+json',
215
+ 'application/rdf+xml',
201
216
  ];
202
217
  const defaultQleverIndexOptions = {
203
218
  'ascii-prefixes-only': true,
@@ -9,8 +9,8 @@ export interface PreprocessResult {
9
9
  * Whether a distribution needs Node-side preprocessing before `qlever-index`
10
10
  * can read it.
11
11
  *
12
- * Only JSON-LD distributions return `true`: `qlever-index` cannot parse
13
- * JSON-LD, so we stream it through a JSON-LD parser into N-Quads first.
12
+ * JSON-LD and RDF/XML distributions return `true`: `qlever-index` cannot parse
13
+ * either, so we stream them through `rdf-parse` into N-Quads first.
14
14
  *
15
15
  * Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
16
16
  * `application/gzip` or `application/zip` — go straight through the shell
@@ -20,12 +20,13 @@ export interface PreprocessResult {
20
20
  */
21
21
  export declare function needsPreprocessing(distribution: Distribution): boolean;
22
22
  /**
23
- * Convert a JSON-LD distribution to N-Quads alongside the source file.
23
+ * Convert a JSON-LD or RDF/XML distribution to N-Quads alongside the source
24
+ * file.
24
25
  *
25
26
  * Streams the source through `rdf-parse` → `rdf-serialize` so memory use
26
27
  * stays bounded regardless of input size. Handles gzip transparently
27
28
  * (declared `compressFormat` or `.gz` filename) and zip containers (folds
28
- * each JSON-LD entry into the output stream in order).
29
+ * each matching entry into the output stream in order).
29
30
  *
30
31
  * Cached: if the output is newer than the input, it is reused as-is.
31
32
  */
@@ -1 +1 @@
1
- {"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAmB5C,MAAM,WAAW,gBAAgB;IAC/B,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,IAAI,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAEtE;AAED;;;;;;;;;GASG;AACH,wBAAsB,UAAU,CAC9B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,gBAAgB,CAAC,CAsB3B"}
1
+ {"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAmC5C,MAAM,WAAW,gBAAgB;IAC/B,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,IAAI,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAKtE;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAC9B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,gBAAgB,CAAC,CA2B3B"}
@@ -8,16 +8,25 @@ import { finished } from 'node:stream/promises';
8
8
  import { promisify } from 'node:util';
9
9
  import yauzl from 'yauzl';
10
10
  const JSONLD_MIME = 'application/ld+json';
11
+ const RDFXML_MIME = 'application/rdf+xml';
11
12
  const ZIP_MIME = 'application/zip';
12
13
  const GZIP_MIME = 'application/gzip';
13
14
  const GZIP_MIME_LEGACY = 'application/x-gzip';
14
- const JSONLD_ZIP_EXTENSIONS = ['.jsonld', '.json'];
15
+ /**
16
+ * RDF media types that `qlever-index` cannot read natively, so they are streamed
17
+ * through `rdf-parse` → N-Quads first. The map key is the `rdf-parse`
18
+ * `contentType`; the value carries the metadata needed to handle zip containers.
19
+ */
20
+ const preprocessFormats = new Map([
21
+ [JSONLD_MIME, { label: 'JSON-LD', zipExtensions: ['.jsonld', '.json'] }],
22
+ [RDFXML_MIME, { label: 'RDF/XML', zipExtensions: ['.rdf', '.xml', '.owl'] }],
23
+ ]);
15
24
  /**
16
25
  * Whether a distribution needs Node-side preprocessing before `qlever-index`
17
26
  * can read it.
18
27
  *
19
- * Only JSON-LD distributions return `true`: `qlever-index` cannot parse
20
- * JSON-LD, so we stream it through a JSON-LD parser into N-Quads first.
28
+ * JSON-LD and RDF/XML distributions return `true`: `qlever-index` cannot parse
29
+ * either, so we stream them through `rdf-parse` into N-Quads first.
21
30
  *
22
31
  * Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
23
32
  * `application/gzip` or `application/zip` — go straight through the shell
@@ -26,20 +35,26 @@ const JSONLD_ZIP_EXTENSIONS = ['.jsonld', '.json'];
26
35
  * format must be declared.
27
36
  */
28
37
  export function needsPreprocessing(distribution) {
29
- return distribution.mimeType === JSONLD_MIME;
38
+ return (distribution.mimeType !== undefined &&
39
+ preprocessFormats.has(distribution.mimeType));
30
40
  }
31
41
  /**
32
- * Convert a JSON-LD distribution to N-Quads alongside the source file.
42
+ * Convert a JSON-LD or RDF/XML distribution to N-Quads alongside the source
43
+ * file.
33
44
  *
34
45
  * Streams the source through `rdf-parse` → `rdf-serialize` so memory use
35
46
  * stays bounded regardless of input size. Handles gzip transparently
36
47
  * (declared `compressFormat` or `.gz` filename) and zip containers (folds
37
- * each JSON-LD entry into the output stream in order).
48
+ * each matching entry into the output stream in order).
38
49
  *
39
50
  * Cached: if the output is newer than the input, it is reused as-is.
40
51
  */
41
52
  export async function preprocess(localFile, distribution) {
42
- if (!needsPreprocessing(distribution)) {
53
+ const contentType = distribution.mimeType;
54
+ const format = contentType === undefined
55
+ ? undefined
56
+ : preprocessFormats.get(contentType);
57
+ if (contentType === undefined || format === undefined) {
43
58
  throw new Error(`preprocess called for distribution that does not need preprocessing: mediaType=${distribution.mimeType}`);
44
59
  }
45
60
  const outputFile = `${localFile}.preprocessed.nq`;
@@ -49,10 +64,10 @@ export async function preprocess(localFile, distribution) {
49
64
  await rm(outputFile, { force: true });
50
65
  const warnings = [];
51
66
  if (distribution.compressMimeType === ZIP_MIME) {
52
- await streamJsonldZip(localFile, outputFile, warnings);
67
+ await streamRdfZip(localFile, outputFile, contentType, format, warnings);
53
68
  }
54
69
  else {
55
- await streamJsonldFile(localFile, outputFile, distribution);
70
+ await streamRdfFile(localFile, outputFile, contentType, distribution);
56
71
  }
57
72
  return { path: outputFile, format: 'nq', warnings };
58
73
  }
@@ -69,12 +84,12 @@ async function outputIsUpToDate(inputFile, outputFile) {
69
84
  }
70
85
  }
71
86
  /**
72
- * Pipe one JSON-LD source through parse → N-Quads serialize into an already
87
+ * Pipe one RDF source through parse → N-Quads serialize into an already
73
88
  * open writable, without closing it. Back-pressure is handled by Node's
74
89
  * built-in `.pipe()`; the caller manages `output`'s lifecycle.
75
90
  */
76
- async function pipeJsonldToWritable(input, output) {
77
- const quads = rdfParser.parse(input, { contentType: JSONLD_MIME });
91
+ async function pipeRdfToWritable(input, output, contentType) {
92
+ const quads = rdfParser.parse(input, { contentType });
78
93
  const bytes = rdfSerializer.serialize(quads, {
79
94
  contentType: 'application/n-quads',
80
95
  });
@@ -88,7 +103,7 @@ async function closeWritable(output) {
88
103
  output.end();
89
104
  });
90
105
  }
91
- async function streamJsonldFile(localFile, outputFile, distribution) {
106
+ async function streamRdfFile(localFile, outputFile, contentType, distribution) {
92
107
  const isGzipped = distribution.compressMimeType === GZIP_MIME ||
93
108
  distribution.compressMimeType === GZIP_MIME_LEGACY ||
94
109
  localFile.toLowerCase().endsWith('.gz');
@@ -96,14 +111,14 @@ async function streamJsonldFile(localFile, outputFile, distribution) {
96
111
  const input = isGzipped ? source.pipe(createGunzip()) : source;
97
112
  const output = createWriteStream(outputFile);
98
113
  try {
99
- await pipeJsonldToWritable(input, output);
114
+ await pipeRdfToWritable(input, output, contentType);
100
115
  }
101
116
  finally {
102
117
  await closeWritable(output);
103
118
  }
104
119
  }
105
120
  const openZip = promisify(yauzl.open);
106
- async function streamJsonldZip(zipFile, outputFile, warnings) {
121
+ async function streamRdfZip(zipFile, outputFile, contentType, format, warnings) {
107
122
  const zip = await openZip(zipFile, { lazyEntries: true });
108
123
  const output = createWriteStream(outputFile);
109
124
  let entriesProcessed = 0;
@@ -119,13 +134,13 @@ async function streamJsonldZip(zipFile, outputFile, warnings) {
119
134
  return;
120
135
  }
121
136
  const extension = extname(entry.fileName).toLowerCase();
122
- if (!JSONLD_ZIP_EXTENSIONS.includes(extension)) {
123
- warnings.push(`Skipping zip entry ${entry.fileName}: extension ${extension || '(none)'} is not JSON-LD`);
137
+ if (!format.zipExtensions.includes(extension)) {
138
+ warnings.push(`Skipping zip entry ${entry.fileName}: extension ${extension || '(none)'} is not ${format.label}`);
124
139
  zip.readEntry();
125
140
  return;
126
141
  }
127
142
  const stream = await openZipEntry(zip, entry);
128
- await pipeJsonldToWritable(stream, output);
143
+ await pipeRdfToWritable(stream, output, contentType);
129
144
  entriesProcessed++;
130
145
  zip.readEntry();
131
146
  }
@@ -142,7 +157,7 @@ async function streamJsonldZip(zipFile, outputFile, warnings) {
142
157
  await closeWritable(output);
143
158
  }
144
159
  if (entriesProcessed === 0) {
145
- throw new Error(`Zip ${zipFile} contains no JSON-LD entries`);
160
+ throw new Error(`Zip ${zipFile} contains no ${format.label} entries`);
146
161
  }
147
162
  }
148
163
  function openZipEntry(zip, entry) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/sparql-qlever",
3
- "version": "0.14.4",
3
+ "version": "0.14.6",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/sparql-qlever"
@@ -35,7 +35,7 @@
35
35
  "rdf-parse": "^5.0.0",
36
36
  "rdf-serialize": "^5.1.0",
37
37
  "tslib": "^2.3.0",
38
- "yauzl": "^3.2.0"
38
+ "yauzl": "^3.3.2"
39
39
  },
40
40
  "devDependencies": {
41
41
  "@rdfjs/types": "^2.0.0",